# Kinesis Data Analytics

![](img/kinesis_data_analytics_docs.png)

In [2]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

kinesis = boto3.Session().client(service_name='kinesis', region_name=region)
analytics = boto3.client('kinesisanalyticsv2')
sts = boto3.Session().client(service_name='sts', region_name=region)

# Download Dataset

In [None]:
!aws s3 cp 's3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz' ./data/

In [None]:
import csv
import pandas as pd

df = pd.read_csv('./data/amazon_reviews_us_Digital_Software_v1_00.tsv.gz', 
                 delimiter='\t', 
                 quoting=csv.QUOTE_NONE,
                 compression='gzip')
df.shape

In [None]:
df.head(5)

In [None]:
partition_key = '123'
#reviews_tsv = '5\tThis is a 5 star review\n1\tThis is a 1 star review\n'

In [None]:
df_star_rating_and_review_body = df[['star_rating', 'review_body']][:100]
df_star_rating_and_review_body.shape

In [None]:
df_star_rating_and_review_body.head()

In [None]:
reviews_tsv = df_star_rating_and_review_body.to_csv(sep='\t',
                                                    header=None,
                                                    index=False)

In [None]:
reviews_tsv

In [10]:
%store -r iam_role_kinesis_arn

In [11]:
print(iam_role_kinesis_arn)

arn:aws:iam::979770387749:role/DSOAWS_Kinesis


In [4]:
%store -r data_stream_arn

In [5]:
print(data_stream_arn)

arn:aws:kinesis:ap-southeast-2:979770387749:stream/dsoaws-data-stream


In [12]:
%store -r firehouse_delivery_stream_arn

In [13]:
print(firehouse_delivery_stream_arn)

arn:aws:firehose:ap-southeast-2:979770387749:deliverystream/dsoaws-firehose-stream


# TODO:  Data Analytics
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/kinesisanalyticsv2.html#KinesisAnalyticsV2.Client.create_application

In [27]:
response = analytics.create_application(
    ApplicationName='DSOAWS_Kinesis_Data_Analytics_Application',
#    ApplicationDescription='string',
    RuntimeEnvironment='SQL-1_0',
#    RuntimeEnvironment='SQL-1_0'|'FLINK-1_6'|'FLINK-1_8',
    ServiceExecutionRole=iam_role_kinesis_arn,
    ApplicationConfiguration={
        'SqlApplicationConfiguration': {
            'Inputs': [
                {
                    'NamePrefix': 'DSOAWS_Kinesis_Data_Analytics_Application_Output',
                    'InputProcessingConfiguration': {
                        'InputLambdaProcessor': {
                            'ResourceARN': 'string'
                        }
                    },
                    'KinesisStreamsInput': {
                        'ResourceARN': data_stream_arn
                    },
#                    'KinesisFirehoseInput': {
#                        'ResourceARN': 'string'
#                    },
                    'InputParallelism': {
                        'Count': 1
                    },
                    'InputSchema': {
                        'RecordFormat': {
                            'RecordFormatType': 'CSV',
                            'MappingParameters': {
#                                 'JSONMappingParameters': {
#                                     'RecordRowPath': 'string'
#                                 },
                                'CSVMappingParameters': {
                                    'RecordRowDelimiter': '\n',
                                    'RecordColumnDelimiter': '\t'
                                }
                            }
                        },
                        'RecordEncoding': 'UTF-8',
                        'RecordColumns': [
                            {
                                'Name': 'star_rating',
                                'SqlType': 'VARCHAR'
                            },
                            {
                                'Name': 'review_body',
                                'SqlType': 'VARCHAR'
                            },                            
                        ]
                    }
                },
            ],
            'Outputs': [
                {
                    'Name': 'DSOAWS_Kinesis_Data_Analytics_Application_Output',
#                     'KinesisStreamsOutput': {
#                         'ResourceARN': 'string'
#                     },
                    'KinesisFirehoseOutput': {
                        'ResourceARN': firehouse_delivery_stream_arn
                    },
#                     'LambdaOutput': {
#                         'ResourceARN': 'string'
#                     },
                    'DestinationSchema': {
                        'RecordFormatType': 'CSV'
                    }
                },
            ],
#             'ReferenceDataSources': [
#                 {
#                     'TableName': 'string',
#                     'S3ReferenceDataSource': {
#                         'BucketARN': 'string',
#                         'FileKey': 'string'
#                     },
#                     'ReferenceSchema': {
#                         'RecordFormat': {
#                             'RecordFormatType': 'JSON'|'CSV',
#                             'MappingParameters': {
#                                 'JSONMappingParameters': {
#                                     'RecordRowPath': 'string'
#                                 },
#                                 'CSVMappingParameters': {
#                                     'RecordRowDelimiter': 'string',
#                                     'RecordColumnDelimiter': 'string'
#                                 }
#                             }
#                         },
#                         'RecordEncoding': 'string',
#                         'RecordColumns': [
#                             {
#                                 'Name': 'string',
#                                 'Mapping': 'string',
#                                 'SqlType': 'string'
#                             },
#                         ]
#                     }
#                 },
#             ]            
        },
        'ApplicationCodeConfiguration' : {
            'CodeContentType': 'PLAINTEXT'
        }
    }
)

ClientError: An error occurred (ValidationException) when calling the CreateApplication operation: 3 validation errors detected: Value 'DSOAWS_Kinesis_Data_Analytics_Application_Output' at 'applicationConfiguration.sqlApplicationConfiguration.outputs.1.member.name' failed to satisfy constraint: Member must have length less than or equal to 32; Value 'string' at 'applicationConfiguration.sqlApplicationConfiguration.inputs.1.member.inputProcessingConfiguration.inputLambdaProcessor.resourceARN' failed to satisfy constraint: Member must satisfy regular expression pattern: arn:.*; Value 'DSOAWS_Kinesis_Data_Analytics_Application_Output' at 'applicationConfiguration.sqlApplicationConfiguration.inputs.1.member.namePrefix' failed to satisfy constraint: Member must have length less than or equal to 32