# Kinesis Data Analytics App


![](img/kinesis-app.png)

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
firehose = boto3.Session().client(service_name='firehose', region_name=region)
kinesis_analytics = boto3.Session().client(service_name='kinesisanalytics', region_name=region)


In [None]:
%store -r firehose_name

In [None]:
print(firehose_name)

In [None]:
%store -r firehose_arn

In [None]:
print(firehose_arn)

In [None]:
%store -r iam_role_kinesis_arn

In [None]:
print(iam_role_kinesis_arn)

# Set up Kinesis Data Analytics SQL App

Below is the SQL derived from the following guide: 
* https://docs.aws.amazon.com/kinesisanalytics/latest/sqlref/sql-reference-avg.html
    
```
CREATE 
OR REPLACE STREAM "AVG_STAR_RATING_SQL_STREAM" (avg_star_rating DOUBLE);

CREATE 
OR REPLACE PUMP "AVG_STAR_RATING_STREAM_PUMP" 

AS 
INSERT INTO "AVG_STAR_RATING_SQL_STREAM" 
    SELECT STREAM 
        AVG(CAST("star_rating" AS DOUBLE)) AS avg_star_rating
    FROM 
        "firehose_001"
    GROUP BY
        STEP("firehose_001".ROWTIME BY INTERVAL '30' SECOND);
```

In [None]:
kinesis_data_analytics_app_name = 'dsoaws-kinesis-data-analytics-sql-app'

In [None]:
# in_app_stream_name = response['ApplicationDetail']['InputDescriptions'][0]['InAppStreamNames'][0]
in_app_stream_name = 'firehose_001'
print(in_app_stream_name)

In [None]:
response = kinesis_analytics.create_application(
    ApplicationName=kinesis_data_analytics_app_name,
    Inputs=[
        {
            'NamePrefix': 'firehose',
            'KinesisFirehoseInput': {
                'ResourceARN': '{}'.format(firehose_arn),
                'RoleARN': '{}'.format(iam_role_kinesis_arn)
            },
#             'InputParallelism': {
#                 'Count': 10
#             },
            'InputSchema': {
                'RecordFormat': {
                    'RecordFormatType': 'CSV',
                    'MappingParameters': {
                        'CSVMappingParameters': {
                            'RecordRowDelimiter': '\n',
                            'RecordColumnDelimiter': '\t'
                        }
                    }
                },
#                'RecordEncoding': 'string',
                'RecordColumns': [
                    {
                        'Name': 'star_rating',
                        'Mapping': 'star_rating',
                        'SqlType': 'INTEGER'
                    },
                    {
                        'Name': 'review_body',
                        'Mapping': 'review_body',
                        'SqlType': 'VARCHAR(8192)'
                    }                    
                ]
            }
        },
    ],
#     Outputs=[
#         {
#             'Name': 'string',
#             'KinesisStreamsOutput': {
#                 'ResourceARN': 'string',
#                 'RoleARN': 'string'
#             },
#             'KinesisFirehoseOutput': {
#                 'ResourceARN': 'string',
#                 'RoleARN': 'string'
#             },
#             'LambdaOutput': {
#                 'ResourceARN': 'string',
#                 'RoleARN': 'string'
#             },
#             'DestinationSchema': {
#                 'RecordFormatType': 'JSON'|'CSV'
#             }
#         },
#     ],
#     CloudWatchLoggingOptions=[
#         {
#             'LogStreamARN': 'string',
#             'RoleARN': 'string'
#         },
#     ],
    ApplicationCode=' \
        CREATE OR REPLACE STREAM "AVG_STAR_RATING_SQL_STREAM" ( \
            avg_star_rating DOUBLE); \
        CREATE OR REPLACE PUMP "AVG_STAR_RATING_STREAM_PUMP" \
            AS INSERT INTO "AVG_STAR_RATING_SQL_STREAM" \
        SELECT STREAM AVG(CAST("star_rating" AS DOUBLE)) AS avg_star_rating \
        FROM "firehose_001" \
        GROUP BY \
        STEP("firehose_001".ROWTIME BY INTERVAL '30' SECOND);' 

    # MAKE SURE YOU \' escape the RANGE INTERVAL ^^ ABOVE ^^ 
    #   or you will see weird errors
)

In [None]:
in_app_stream_name = response['ApplicationDetail']['InputDescriptions'][0]['InAppStreamNames'][0]
# in_app_stream_name = 'in_app_stream_name'
print(in_app_stream_name)

In [None]:
response = kinesis_analytics.describe_application(
    ApplicationName=kinesis_data_analytics_app_name)
print(response)

In [None]:
input_id = response['ApplicationDetail']['InputDescriptions'][0]['InputId']
print(input_id)

In [None]:
response = kinesis_analytics.start_application(
    ApplicationName=kinesis_data_analytics_app_name,
    InputConfigurations=[
        {
            'Id': input_id,
            'InputStartingPositionConfiguration': {
                'InputStartingPosition': 'NOW'
            }
        }
    ]
)

In [None]:
print(response)

In [None]:
from IPython.core.display import display, HTML
        
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/kinesisanalytics/home?region={}#/wizard/editor?applicationName={}"> Kinesis Data Analytics App</a></b>'.format(region, kinesis_data_analytics_app_name)))


In [None]:
response = kinesis_analytics.describe_application(ApplicationName=kinesis_data_analytics_app_name)

In [None]:
import time

app_status = response['ApplicationDetail']['ApplicationStatus']

while app_status != 'RUNNING':
    time.sleep(5)
    response = kinesis_analytics.describe_application(
        ApplicationName=kinesis_data_analytics_app_name)
    app_status = response['ApplicationDetail']['ApplicationStatus']

print('Application status {}'.format(app_status))

# Store Variables for Next Notebooks

In [None]:
%store kinesis_data_analytics_app_name

In [None]:
%store

In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();