# Put Reviews On Kinesis Data Firehose

In [None]:
import boto3
import sagemaker
import pandas as pd
import json

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
firehose = boto3.Session().client(service_name='firehose', region_name=region)
kinesis_analytics = boto3.Session().client(service_name='kinesisanalytics', region_name=region)


In [None]:
%store -r firehose_name

In [None]:
print(firehose_name)

In [None]:
%store -r firehose_arn

In [None]:
print(firehose_arn)

In [None]:
%store -r iam_role_kinesis_arn

In [None]:
print(iam_role_kinesis_arn)

In [None]:
%store -r kinesis_data_analytics_app_name

In [None]:
print(kinesis_data_analytics_app_name)

In [None]:
%store -r lambda_fn_name

In [None]:
print(lambda_fn_name)

In [None]:
firehoses = firehose.list_delivery_streams(DeliveryStreamType='DirectPut')
print(json.dumps(firehoses, indent=4, sort_keys=True, default=str))

# Download Dataset

In [None]:
!aws s3 cp 's3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz' ./data/

In [None]:
import csv
import pandas as pd

df = pd.read_csv('./data/amazon_reviews_us_Digital_Software_v1_00.tsv.gz', 
                 delimiter='\t', 
                 quoting=csv.QUOTE_NONE,
                 compression='gzip')
df.shape

In [None]:
df.head(5)

In [None]:
df_star_rating_and_review_body = df[['review_id', 
                                         'star_rating', 
                                         'product_category', 
                                         'review_body']][0:1]

df_star_rating_and_review_body.to_csv(sep='\t',
                                      header=None,
                                      index=False)

# Simulate Application Writing Records to the Stream

In [None]:
firehose_response = firehose.describe_delivery_stream(
    DeliveryStreamName=firehose_name
)

print(firehose_response)

# Put Records onto Firehose

In [None]:
step = 1
for start_idx in range(0, 10000, step):
    end_idx = start_idx + step

    df_star_rating_and_review_body = df[['review_id', 
                                         'star_rating', 
                                         'product_category', 
                                         'review_body']][start_idx:end_idx]

    reviews_tsv = df_star_rating_and_review_body.to_csv(sep='\t',
                                                        header=None,
                                                        index=False)
    
    # print(reviews_tsv.encode('utf-8'))
    
    response = firehose.put_record(        
        Record={
            'Data': reviews_tsv.encode('utf-8')
        },
        DeliveryStreamName=firehose_name
    )

In [None]:
anomaly_step = 1
import time

timestamp = int(time.time())

for start_idx in range(0, 10000, anomaly_step):
    end_idx = start_idx + step

    df_anomalies = pd.DataFrame([
        {'review_id', str(timestamp), 
         'star_rating': 100, 
         'product_category': 'Digital_Software', 
         'review_body': 'blahblah'},     
    ], columns=['star_rating', 'review_body'])

    df_star_rating_and_review_body_anomalies = df_anomalies[['star_rating', 
                                                             'review_body']][start_idx:end_idx]

    reviews_tsv_anomalies = df_star_rating_and_review_body_anomalies.to_csv(sep='\t',
                                                                            header=None,
                                                                            index=False)
    response = firehose.put_record(           
        Record={
            'Data': reviews_tsv_anomalies.encode('utf-8')
        },
        DeliveryStreamName=firehose_name
    )

# Analyze Stream with Kinesis Data Analytics

![](./img/no_rows_in_source_kinesis_firehose_stream.png)

## _Re-Run ^^ Above ^^ Cell If You See `No rows in source stream`_

# Explore Kinesis Data Analytics App

In [None]:
from IPython.core.display import display, HTML
        
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/kinesisanalytics/home?region={}#/wizard/editor?applicationName={}"> Kinesis Data Analytics App</a></b>'.format(region, kinesis_data_analytics_app_name)))


# Explore Kinesis Firehose

In [None]:
from IPython.core.display import display, HTML
    
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/firehose/home?region={}#/details/{}/monitoring"> Firehose</a></b>'.format(region, firehose_name)))


# Explore Custom CloudWatch Metrics

In [None]:
from IPython.core.display import display, HTML
    
display(HTML("""<b>Review <a target="blank" href="https://console.aws.amazon.com/cloudwatch/home?region={}#metricsV2:graph=~(metrics~(~(~'kinesis*2fanalytics*2fAVGStarRating~'AVGStarRating~'Product*20Category~'All))~view~'timeSeries~stacked~false~start~'-PT5M~end~'P0D~region~'us-east-1~liveData~true~stat~'Average~period~1~title~'Avg*20Star*20Rating);query=~'*7bkinesis*2fanalytics*2fAVGStarRating*2c*22Product*20Category*22*7d">CloudWatch Metrics</a></b>""".format(region, region)))


# Explore Lambda Code

In [None]:
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/lambda/home?region={}#/functions/DeliverKinesisAnalyticsToCloudWatch">Lambda</a></b>'.format(region, lambda_fn_name)))


# Explore Lambda Logs

In [None]:
from IPython.core.display import display, HTML
    
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/cloudwatch/home?region={}#logStream:group=%252Faws%252Flambda%252F{}">Lambda Logs</a></b>'.format(region, lambda_fn_name)))


In [None]:
# %%javascript
# Jupyter.notebook.save_checkpoint();
# Jupyter.notebook.session.delete();