In [1]:
import os
import io
import sys
import json
import boto3
import argparse
import datetime as dt
import pandas as pd
import numpy as np
import time

In [2]:
kinesis_stream_name = 'customer_churn_stream'
region = 'us-east-1'
datafile = 'churn.txt'

In [3]:
!aws kinesis create-stream --stream-name customer_churn_stream --shard-count 1 --region us-east-1

In [4]:
# Generates a list of records to be sent to kinesis
def generate_kinesis_record(datafile):
    df = pd.read_csv(datafile, header='infer')
    records = json.loads(df.to_json(orient='records'))
    return records

In [5]:
session = boto3.Session()
kinesis_client = session.client('kinesis', region)

In [6]:
stream_status = 'Inactive'

while stream_status != 'Active':
    response = kinesis_client.describe_stream(StreamName=kinesis_stream_name)
    if response['StreamDescription']['StreamStatus'] == 'ACTIVE':
        stream_status = 'Active'
    else:
        # wait for 5 second
        time.sleep(5)

print('Kinesis data stream status is:', stream_status)

Kinesis data stream status is: Active


In [7]:
for row in generate_kinesis_record(datafile):
    data = json.dumps(row)
    stream_record = [{'Data': bytes(data, 'utf-8'), 'PartitionKey': 'partition_key'}]
    # print(stream_record)
    kinesis_client.put_records(StreamName=kinesis_stream_name, Records=stream_record)

In [9]:
shard_id = response['StreamDescription']['Shards'][0]['ShardId']
shard_iterator = kinesis_client.get_shard_iterator(StreamName=kinesis_stream_name,
                                                      ShardId=shard_id,
                                                      ShardIteratorType='TRIM_HORIZON')
shard_iterator

{'ShardIterator': 'AAAAAAAAAAHYsFQASD8cIgvEbBSVnWrNsnVOrvUeML2UQn2Od8b5ZSw/lCqkTHeCSS4ml/Xk7VY7qdL1tSXTJ5CdOJAL65++BOoKDm0Oc/iHdwvwT6Bl9jyxd9+TxiafDzOjwQoIy4YBLS73hG1RtdKDcffVA0vpQxMi7XKw00LTzj0Dv8yqv6LwtJy+7gd4t4B3DHgCLxLCGRwEBDa/cojdFkwqHAr2n652xbdxGXtNYvHnQUQpcUHWVASd1l+bOP+9wvxRWFw=',
 'ResponseMetadata': {'RequestId': 'f4637a4f-8f66-b8b0-adec-9a390dec36e5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f4637a4f-8f66-b8b0-adec-9a390dec36e5',
   'x-amz-id-2': 'oBvQC0A4pVVn6V257QYwErIPFiwIMTqQNU/YSDhhQktIChjn6i9pfbycDmvfygz6RI07HzaLyMLCogbZUtCaOPeiwT7/LEnX',
   'date': 'Fri, 13 Aug 2021 02:51:34 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '288'},
  'RetryAttempts': 0}}

In [10]:
my_shard_iterator = shard_iterator['ShardIterator']

record_response = kinesis_client.get_records(ShardIterator=my_shard_iterator, Limit=2)
record_response['Records']

[{'SequenceNumber': '49621040786984148778304013989830376064145779323184349186',
  'ApproximateArrivalTimestamp': datetime.datetime(2021, 8, 13, 2, 50, 44, 192000, tzinfo=tzlocal()),
  'Data': b'{"State": "KS", "Account Length": 128, "Area Code": 415, "Phone": "382-4657", "Int\'l Plan": "no", "VMail Plan": "yes", "VMail Message": 25, "Day Mins": 265.1, "Day Calls": 110, "Day Charge": 45.07, "Eve Mins": 197.4, "Eve Calls": 99, "Eve Charge": 16.78, "Night Mins": 244.7, "Night Calls": 91, "Night Charge": 11.01, "Intl Mins": 10.0, "Intl Calls": 3, "Intl Charge": 2.7, "CustServ Calls": 1, "Churn?": "False."}',
  'PartitionKey': 'partition_key'},
 {'SequenceNumber': '49621040786984148778304013989831584989965393952359055362',
  'ApproximateArrivalTimestamp': datetime.datetime(2021, 8, 13, 2, 50, 44, 204000, tzinfo=tzlocal()),
  'Data': b'{"State": "OH", "Account Length": 107, "Area Code": 415, "Phone": "371-7191", "Int\'l Plan": "no", "VMail Plan": "yes", "VMail Message": 26, "Day Mins": 161.6