# Put Reviews On Kinesis Data Firehose

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
firehose = boto3.Session().client(service_name='firehose', region_name=region)
kinesis_analytics = boto3.Session().client(service_name='kinesisanalytics', region_name=region)


In [None]:
%store -r firehose_name

In [None]:
print(firehose_name)

In [None]:
%store -r firehose_arn

In [None]:
print(firehose_arn)

In [None]:
%store -r iam_role_kinesis_arn

In [None]:
print(iam_role_kinesis_arn)

In [None]:
%store -r kinesis_data_analytics_app_name

In [None]:
print(kinesis_data_analytics_app_name)

In [None]:
firehoses = firehose.list_delivery_streams(DeliveryStreamType='DirectPut')
print(firehoses)

# Download Dataset

In [None]:
!aws s3 cp 's3://amazon-reviews-pds/tsv/amazon_reviews_us_Digital_Software_v1_00.tsv.gz' ./data/

In [None]:
import csv
import pandas as pd

df = pd.read_csv('./data/amazon_reviews_us_Digital_Software_v1_00.tsv.gz', 
                 delimiter='\t', 
                 quoting=csv.QUOTE_NONE,
                 compression='gzip')
df.shape

In [None]:
df.head(5)

In [None]:
df_star_rating_and_review_body = df[['star_rating', 'review_body']][:100]
df_star_rating_and_review_body.shape

In [None]:
df_star_rating_and_review_body.head()

In [None]:
reviews_tsv = df_star_rating_and_review_body.to_csv(sep='\t',
                                                    header=None,
                                                    index=False)

In [None]:
reviews_tsv

# Simulate Application Writing Records to the Stream

In [None]:
firehose_response = firehose.describe_delivery_stream(
    DeliveryStreamName=firehose_name
)

print(firehose_response)

# Put Records onto Firehose

In [None]:
for i in range(0, 50):
    response = firehose.put_record(
        Record={
            'Data': reviews_tsv.encode('utf-8')
        },
        DeliveryStreamName=firehose_name
    )

# Analyze Stream with Kinesis Data Analytics

_Re-Run ^^ Above ^^ Cell If You See `No rows in source stream`_

![](./img/no_rows_in_source_kinesis_firehose_stream.png)

In [None]:
from IPython.core.display import display, HTML
        
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/kinesisanalytics/home?region={}#/wizard/editor?applicationName={}"> Kinesis Data Analytics App</a></b>'.format(region, kinesis_data_analytics_app_name)))


In [None]:
from IPython.core.display import display, HTML
    
display(HTML('<b>Review <a target="blank" href="https://console.aws.amazon.com/firehose/home?region={}#/details/{}/monitoring"> Firehose</a></b>'.format(region, firehose_name)))


In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();