# Kinesis Data Analytics App


![](img/kinesis-app.png)

In [1]:
import boto3
import sagemaker
import pandas as pd
import json

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sts = boto3.Session().client(service_name='sts', region_name=region)
account_id = sts.get_caller_identity()['Account']

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
firehose = boto3.Session().client(service_name='firehose', region_name=region)
kinesis_analytics = boto3.Session().client(service_name='kinesisanalytics', region_name=region)

In [2]:
%store -r firehose_name

In [3]:
print(firehose_name)

dsoaws-kinesis-data-firehose


In [4]:
%store -r firehose_arn

In [5]:
print(firehose_arn)

arn:aws:firehose:us-east-1:806570384721:deliverystream/dsoaws-kinesis-data-firehose


In [6]:
%store -r iam_role_kinesis_arn

In [7]:
print(iam_role_kinesis_arn)

arn:aws:iam::806570384721:role/DSOAWS_Kinesis


In [8]:
%store -r stream_arn

In [9]:
print(stream_arn)

arn:aws:kinesis:us-east-1:806570384721:stream/dsoaws-kinesis-data-stream


In [10]:
%store -r lambda_fn_arn

In [11]:
print(lambda_fn_arn)

arn:aws:lambda:us-east-1:806570384721:function:DeliverKinesisAnalyticsToCloudWatch2


In [12]:
%store -r iam_role_lambda_arn

In [13]:
print(iam_role_lambda_arn)

arn:aws:iam::806570384721:role/DSOAWS_Lambda


# Create Kinesis Data Analytics App (SQL)

Below is the SQL derived from the following guide: 
* https://docs.aws.amazon.com/kinesisanalytics/latest/sqlref/sql-reference-avg.html
    
```
CREATE 
OR REPLACE STREAM "AVG_STAR_RATING_SQL_STREAM" (avg_star_rating DOUBLE);

CREATE 
OR REPLACE PUMP "AVG_STAR_RATING_STREAM_PUMP" 

AS 
INSERT INTO "AVG_STAR_RATING_SQL_STREAM" 
    SELECT STREAM 
        AVG(CAST("star_rating" AS DOUBLE)) AS avg_star_rating
    FROM 
        "firehose_001"
    GROUP BY
        STEP("firehose_001".ROWTIME BY INTERVAL '30' SECOND);
```

# Specify the Kinesis Analytics Application Name

In [14]:
kinesis_data_analytics_app_name = 'dsoaws-kinesis-data-analytics-sql-app3'

In [15]:
in_app_stream_name = 'firehose_001' # Default

# Create Application

In [34]:
response = kinesis_analytics.create_application(
    ApplicationName=kinesis_data_analytics_app_name,
    Inputs=[
        {
            'NamePrefix': 'firehose',
            'KinesisFirehoseInput': {
                'ResourceARN': '{}'.format(firehose_arn),
                'RoleARN': '{}'.format(iam_role_kinesis_arn)
            },
            'InputSchema': {
                'RecordFormat': {
                    'RecordFormatType': 'CSV',
                    'MappingParameters': {
                        'CSVMappingParameters': {
                            'RecordRowDelimiter': '\n',
                            'RecordColumnDelimiter': '\t'
                        }
                    }
                },
                'RecordColumns': [
                    {
                        'Name': 'star_rating',
                        'Mapping': 'star_rating',
                        'SqlType': 'INTEGER'
                    },
                    {
                        'Name': 'review_body',
                        'Mapping': 'review_body',
                        'SqlType': 'VARCHAR(65535)'
                    }
                ]
            }
        },
    ],
    Outputs=[
        {
            'Name': 'AVG_STAR_RATING_STREAM',
            
            'KinesisStreamsOutput': {
                'ResourceARN': '{}'.format(stream_arn),
                'RoleARN': '{}'.format(iam_role_kinesis_arn)
            },

            'DestinationSchema': {
                'RecordFormatType': 'CSV'
            }
        },
        {
            'Name': 'AVG_STAR_RATING_SQL_STREAM',
            'LambdaOutput': {
                'ResourceARN': '{}'.format(lambda_fn_arn),
                'RoleARN': '{}'.format(iam_role_lambda_arn)
            },
            'DestinationSchema': {
                'RecordFormatType': 'CSV'
            }
        }
    ],
    ApplicationCode=' \
        CREATE OR REPLACE STREAM "AVG_STAR_RATING_SQL_STREAM" ( \
            avg_star_rating DOUBLE); \
        CREATE OR REPLACE PUMP "AVG_STAR_RATING_STREAM_PUMP" \
            AS INSERT INTO "AVG_STAR_RATING_SQL_STREAM" \
        SELECT STREAM AVG(CAST("star_rating" AS DOUBLE)) AS avg_star_rating \
        FROM "{}" \
        GROUP BY \
        STEP("{}".ROWTIME BY INTERVAL \'5\' SECOND);'.format(in_app_stream_name, in_app_stream_name)
        # MAKE SURE YOU \' escape the RANGE INTERVAL ^^ ABOVE ^^ 
        #  or you will see weird errors
)

In [35]:
response = kinesis_analytics.describe_application(ApplicationName=kinesis_data_analytics_app_name)
print(json.dumps(response, indent=4, sort_keys=True, default=str))

{
    "ApplicationDetail": {
        "ApplicationARN": "arn:aws:kinesisanalytics:us-east-1:806570384721:application/dsoaws-kinesis-data-analytics-sql-app3",
        "ApplicationCode": "CREATE OR REPLACE STREAM \"AVG_STAR_RATING_SQL_STREAM\" (             avg_star_rating DOUBLE);         CREATE OR REPLACE PUMP \"AVG_STAR_RATING_STREAM_PUMP\"             AS INSERT INTO \"AVG_STAR_RATING_SQL_STREAM\"         SELECT STREAM AVG(CAST(\"star_rating\" AS DOUBLE)) AS avg_star_rating         FROM \"firehose_001\"         GROUP BY         STEP(\"firehose_001\".ROWTIME BY INTERVAL '5' SECOND);\n",
        "ApplicationName": "dsoaws-kinesis-data-analytics-sql-app3",
        "ApplicationStatus": "READY",
        "ApplicationVersionId": 1,
        "CreateTimestamp": "2020-09-02 17:04:48+00:00",
        "InputDescriptions": [
            {
                "InAppStreamNames": [
                    "firehose_001"
                ],
                "InputId": "1.1",
                "InputParallelism": {


In [36]:
input_id = response['ApplicationDetail']['InputDescriptions'][0]['InputId']
print(input_id)

1.1


# Start the Kinesis Data Analytics App (SQL)
_Please be patient.  This may take a few minutes._

In [37]:
response = kinesis_analytics.start_application(
    ApplicationName=kinesis_data_analytics_app_name,
    InputConfigurations=[
        {
            'Id': input_id,
            'InputStartingPositionConfiguration': {
                'InputStartingPosition': 'NOW'
            }
        }
    ]
)

In [38]:
print(json.dumps(response, indent=4, sort_keys=True, default=str))

{
    "ResponseMetadata": {
        "HTTPHeaders": {
            "content-length": "2",
            "content-type": "application/x-amz-json-1.1",
            "date": "Wed, 02 Sep 2020 17:05:43 GMT",
            "x-amzn-requestid": "bb715fd8-2bd5-4b9a-8962-36259d97c5ec"
        },
        "HTTPStatusCode": 200,
        "RequestId": "bb715fd8-2bd5-4b9a-8962-36259d97c5ec",
        "RetryAttempts": 0
    }
}


In [39]:
from IPython.core.display import display, HTML
        
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/kinesisanalytics/home?region={}#/wizard/editor?applicationName={}"> Kinesis Data Analytics App</a></b>'.format(region, kinesis_data_analytics_app_name)))


In [40]:
response = kinesis_analytics.describe_application(ApplicationName=kinesis_data_analytics_app_name)

In [41]:
import time

app_status = response['ApplicationDetail']['ApplicationStatus']

while app_status != 'RUNNING':
    time.sleep(5)
    response = kinesis_analytics.describe_application(
        ApplicationName=kinesis_data_analytics_app_name)
    app_status = response['ApplicationDetail']['ApplicationStatus']
    print('Application status {}'.format(app_status))

print('Application status {}'.format(app_status))

Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status STARTING
Application status RUNNING
Application status RUNNING


# Store Variables for Next Notebooks

In [42]:
%store kinesis_data_analytics_app_name

Stored 'kinesis_data_analytics_app_name' (str)


In [43]:
%store

Stored variables and their in-db values:
autopilot_endpoint_name                     -> 'automl-dm-ep-28-19-07-00'
firehose_arn                                -> 'arn:aws:firehose:us-east-1:806570384721:deliverys
firehose_name                               -> 'dsoaws-kinesis-data-firehose'
header_train_s3_uri                         -> 's3://sagemaker-us-east-1-806570384721/data/amazon
iam_kinesis_role_name                       -> 'DSOAWS_Kinesis'
iam_lambda_role_name                        -> 'DSOAWS_Lambda'
iam_role_kinesis_arn                        -> 'arn:aws:iam::806570384721:role/DSOAWS_Kinesis'
iam_role_lambda_arn                         -> 'arn:aws:iam::806570384721:role/DSOAWS_Lambda'
kinesis_data_analytics_app_name             -> 'dsoaws-kinesis-data-analytics-sql-app3'
lambda_fn_arn                               -> 'arn:aws:lambda:us-east-1:806570384721:function:De
lambda_fn_name                              -> 'DeliverKinesisAnalyticsToCloudWatch2'
noheader_train_s3_uri  

In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();