# Amazon Kinesis Data Firehose

* https://github.com/aws-samples/aws-ml-data-lake-workshop
* https://aws.amazon.com/blogs/big-data/snakes-in-the-stream-feeding-and-eating-amazon-kinesis-streams-with-python/

![](img/kinesis_firehose_s3_docs.png)


In [7]:
import boto3
import sagemaker
import pandas as pd
import json

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)
firehose = boto3.Session().client(service_name='firehose', region_name=region)

In [8]:
%store -r firehose_name

In [9]:
print(firehose_name)

dsoaws-kinesis-data-firehose


In [10]:
%store -r iam_role_kinesis_arn

In [11]:
print(iam_role_kinesis_arn)

arn:aws:iam::806570384721:role/DSOAWS_Kinesis


# Create a Kinesis Data Firehose Delivery Stream
_This may take 1-2 minutes.  Please be patient._

In [12]:
from botocore.exceptions import ClientError

try: 
    response = firehose.create_delivery_stream(
        DeliveryStreamName=firehose_name,
        DeliveryStreamType='DirectPut',
        S3DestinationConfiguration={
            'RoleARN': iam_role_kinesis_arn,
            'BucketARN': 'arn:aws:s3:::{}'.format(bucket),
            'Prefix': 'kinesis-data-firehose',        
        }
    )
    print('Delivery stream {} successfully created.'.format(firehose_name))
    print(json.dumps(response, indent=4, sort_keys=True, default=str))
except ClientError as e:
    if e.response['Error']['Code'] == 'ResourceInUseException':
        print('Delivery stream {} already exists.'.format(firehose_name))
    else:
        print('Unexpected error: %s' % e)
    

Delivery stream dsoaws-kinesis-data-firehose already exists.


In [13]:
import time

status = ''
while status != 'ACTIVE':    
    r = firehose.describe_delivery_stream(DeliveryStreamName=firehose_name)
    description = r.get('DeliveryStreamDescription')
    status = description.get('DeliveryStreamStatus')
    time.sleep(5)
    
print('Delivery Stream {} is active'.format(firehose_name))

Delivery Stream dsoaws-kinesis-data-firehose is active


In [16]:
r = firehose.describe_delivery_stream(DeliveryStreamName=firehose_name)

status = description.get('DeliveryStreamStatus')
print(status)

print()

description = r.get('DeliveryStreamDescription')
print(json.dumps(description, indent=4, sort_keys=True, default=str))

ACTIVE

{
    "CreateTimestamp": "2020-09-02 14:04:32.593000+00:00",
    "DeliveryStreamARN": "arn:aws:firehose:us-east-1:806570384721:deliverystream/dsoaws-kinesis-data-firehose",
    "DeliveryStreamEncryptionConfiguration": {
        "Status": "DISABLED"
    },
    "DeliveryStreamName": "dsoaws-kinesis-data-firehose",
    "DeliveryStreamStatus": "ACTIVE",
    "DeliveryStreamType": "DirectPut",
    "Destinations": [
        {
            "DestinationId": "destinationId-000000000001",
            "ExtendedS3DestinationDescription": {
                "BucketARN": "arn:aws:s3:::sagemaker-us-east-1-806570384721",
                "BufferingHints": {
                    "IntervalInSeconds": 300,
                    "SizeInMBs": 5
                },
                "CloudWatchLoggingOptions": {
                    "Enabled": false
                },
                "CompressionFormat": "UNCOMPRESSED",
                "EncryptionConfiguration": {
                    "NoEncryptionConfig": "NoE

In [17]:
firehose_arn = r['DeliveryStreamDescription']['DeliveryStreamARN']
print(firehose_arn)

arn:aws:firehose:us-east-1:806570384721:deliverystream/dsoaws-kinesis-data-firehose


# Store Variables for the Next Notebooks

In [18]:
%store firehose_arn

Stored 'firehose_arn' (str)


In [19]:
%store

Stored variables and their in-db values:
autopilot_endpoint_name                     -> 'automl-dm-ep-28-19-07-00'
firehose_arn                                -> 'arn:aws:firehose:us-east-1:806570384721:deliverys
firehose_name                               -> 'dsoaws-kinesis-data-firehose'
header_train_s3_uri                         -> 's3://sagemaker-us-east-1-806570384721/data/amazon
iam_kinesis_role_name                       -> 'DSOAWS_Kinesis'
iam_role_kinesis_arn                        -> 'arn:aws:iam::806570384721:role/DSOAWS_Kinesis'
kinesis_data_analytics_app_name             -> 'dsoaws-kinesis-data-analytics-sql-app-2'
lambda_fn_name                              -> 'DeliverKinesisAnalyticsToCloudWatch'
noheader_train_s3_uri                       -> 's3://sagemaker-us-east-1-806570384721/data/amazon
partition_key                               -> 'CAFEPERSON'
stream_arn                                  -> 'arn:aws:kinesis:us-east-1:806570384721:stream/dso
stream_name           

In [None]:
%%javascript
Jupyter.notebook.save_checkpoint();
Jupyter.notebook.session.delete();