In [0]:
# Milestone 9 Task 4
# Read data from Kinesis Streams in Databricks
"""
Step 1:
Create a new Notebook in Databricks and read in your credentials authentication_credentials.csv file to retrieve the Access Key and Secret Access Key. Follow the same process for this, as you have followed for your batch data. 

Step 2:
Run your preferred method to ingest data into Kinesis Data Streams. In the Kinesis console, check your data streams are receiving the data. 

Step 3:
Read the data from the three streams you have created in your Databricks Notebook.
"""
# pyspark functions
from pyspark.sql.functions import *
from pyspark.sql.types import ArrayType, DoubleType
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, TimestampType, FloatType
from pyspark.sql.types import ArrayType, DoubleType
# URL processing
import urllib

# Specify file type to be csv
#file_type = "csv"
# Indicates file has first row as the header
#first_row_is_header = "true"
# Indicates file has comma as the delimeter
#delimiter = ","
# Read the CSV file to spark dataframe
"""aws_keys_df = spark.read.format(file_type)\
.option("header", first_row_is_header)\
.option("sep", delimiter)\
.load("/FileStore/tables/authentication_credentials.csv")
"""
# UPDATE JAN 24 - credential file has since changed
delta_table_path = "dbfs:/user/hive/warehouse/authentication_credentials"
aws_keys_df = spark.read.format("delta").load(delta_table_path)

# Get the AWS access key and secret key from the spark dataframe
ACCESS_KEY = aws_keys_df.select('Access key ID').collect()[0]['Access key ID']
SECRET_KEY = aws_keys_df.select('Secret access key').collect()[0]['Secret access key']
# Encode the secrete key
ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")

In [0]:
# Reading Pin data in Databricks
df_pin = spark \
.readStream \
.format('kinesis') \
.option('streamName',"streaming-0eb84f80c29b-pin") \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()
display(df_pin)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
pin,eyJpbmRleCI6NDk3LCJ1bmlxdWVfaWQiOiI1MTFhMDU4NC02YmM0LTRjZjAtODk2My05ZGRhZGVmZWM5YmMiLCJ0aXRsZSI6IjMwIFBpY3MgT2YgQSBGYW1pbHkgVGhhdCBVc2VzIENoYWxrIEFydCBUbyBHbyBPbiBBZHZlbnQ= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111313790338719007931536253124642,2024-01-03T13:21:44.830+0000
pin,eyJpbmRleCI6MTAxMjAsInVuaXF1ZV9pZCI6IjlkMThhZjg0LTdlODYtNDJkMS04NDE1LTYzY2YxZjhkZDQ1ZSIsInRpdGxlIjoiV2hlcmUgdG8gU3RheSBpbiBMb25kb24gKyBUaGUgQmVzdCBIb3RlbHMgaW4gTG9uZG9uOiA= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111314469755029631353132437995554,2024-01-03T13:21:45.478+0000
pin,eyJpbmRleCI6MTg3LCJ1bmlxdWVfaWQiOiJiZTgzYjQ3ZS0yNzI1LTRmZDctOWIwOC0yMmZlN2ZmNjc5OTAiLCJ0aXRsZSI6IldhdGVyY29sb3IgUGFpbnRpbmdzIGZyb20gRXRzeSAtIFRoZSBIb25leWNvbWIgSG9tZSIsImQ= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111316174340435287980406212657186,2024-01-03T13:21:47.104+0000
pin,eyJpbmRleCI6MjczOSwidW5pcXVlX2lkIjoiMTAwNmJlNjUtMGVhZS00NTM3LWFkYTEtZjY4YzM3YjM3ZWMwIiwidGl0bGUiOiIyNSsgRWFzeSBUaGFua3NnaXZpbmcgQ3JhZnRzIGZvciBLaWRzIiwiZGVzY3JpcHRpb24iOiI= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111318053011158969114212425531426,2024-01-03T13:21:48.742+0000
pin,eyJpbmRleCI6NzE4NiwidW5pcXVlX2lkIjoiZTNiYWRmZTYtYWNmYy00Y2M1LTk2YzUtZjhiYTYyMDgzMGI2IiwidGl0bGUiOiJSZWQgSG90IENoaWxpIFBlcHBlcnMgLSBTY2FyIFRpc3N1ZSAtIFdvbWVuJ3MgVC1zaGlydCA= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111318700995398282555518787518498,2024-01-03T13:21:49.361+0000
pin,eyJpbmRleCI6NzE5OSwidW5pcXVlX2lkIjoiOWZkMjlhNmMtODUzZi00NGJhLWFlODctNzZiYzkwOWNkZjJlIiwidGl0bGUiOiJTw6lsZWN0aW9uIEJvdHRpbmVzIEhvbW1lIEF1dG9tbmUgLyBIaXZlciAyMDIxIHwgR3VpZGU= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111320555487605571396810225745954,2024-01-03T13:21:51.004+0000
pin,eyJpbmRleCI6Mzc5OCwidW5pcXVlX2lkIjoiMzU2NmRlOTMtMWFkYy00Yzg4LThmNDgtNGU4MDczY2QyOTQyIiwidGl0bGUiOiJEZWNsaW5pbmcgYnkgRGVncmVlcyA6IEhpZ2hlciBFZHVjYXRpb24gYXQgUmlzayAoUGFwZXI= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111322240730198114189948485632034,2024-01-03T13:21:52.649+0000
pin,eyJpbmRleCI6MTA2MjIsInVuaXF1ZV9pZCI6IjJiZWEzNDg1LWM5MzEtNDZiNC04YzY4LTkwNjhiMmFkOWI3OSIsInRpdGxlIjoiUGFuaGFyZCdzIENyYWIgTWF5IEp1c3QgQmUgVGhlIEZ1dHVyZSBPZiBBcm1vcmVkIFNjb3U= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111322960041060784894376155283490,2024-01-03T13:21:53.293+0000
pin,eyJpbmRleCI6NDkxNSwidW5pcXVlX2lkIjoiMjc3YTE4MWItMWM3Zi00ZWQ4LWIxYzUtNGQ0OTU1NWFlMGUxIiwidGl0bGUiOiJXaGF0IGlzIHRoZSBkaWZmZXJlbmNlIGJldHdlZW4gbWFjcmFtZSBjb3JkLCBtYWNyYW1lIHI= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111323591100338623730805351907362,2024-01-03T13:21:53.949+0000
pin,eyJpbmRleCI6NTc0OSwidW5pcXVlX2lkIjoiZTYzMzI1NTUtNTczZC00ZGEyLTk0ODItMGQzY2Y3N2Q3YjQ0IiwidGl0bGUiOiI2IFN0ZXAgUGxhbiB0byBQYXkgT2ZmIERlYnQgYW5kIFNhdmUgTW9uZXkgLSBMaWZlIGFuZCA= (truncated),streaming-0eb84f80c29b-pin,shardId-000000000002,49646997465727834911146111325290850041001899493708267554,2024-01-03T13:21:55.579+0000


In [0]:
# Reading geo data in Databricks
df_geo = spark \
.readStream \
.format('kinesis') \
.option('streamName',"streaming-0eb84f80c29b-geo") \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()
display(df_geo)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
geo,eyJpbmQiOjQ5NywidGltZXN0YW1wIjoiMjAyMS0xMC0yMiAxMjo1OTowNiIsImxhdGl0dWRlIjotODguODI5OCwibG9uZ2l0dWRlIjotMTcwLjE4OCwiY291bnRyeSI6IkFsYmFuaWEifQ==,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278471193167463595209252239573042,2024-01-03T14:00:12.356+0000
geo,eyJpbmQiOjEwMTIwLCJ0aW1lc3RhbXAiOiIyMDIyLTAzLTEwIDA3OjAyOjMyIiwibGF0aXR1ZGUiOi01NS4xNTYyLCJsb25naXR1ZGUiOi0xMjIuODU0LCJjb3VudHJ5IjoiSXNsZSBvZiBNYW4ifQ==,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278474410119069589737692291137586,2024-01-03T14:00:13.835+0000
geo,eyJpbmQiOjE4NywidGltZXN0YW1wIjoiMjAxOC0wNi0yMCAxMDozMjo0OCIsImxhdGl0dWRlIjotNTUuMTczMiwibG9uZ2l0dWRlIjotMTc1Ljg2NCwiY291bnRyeSI6IkRvbWluaWNhIn0=,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278479296597232472068953892454450,2024-01-03T14:00:16.176+0000
geo,eyJpbmQiOjI3MzksInRpbWVzdGFtcCI6IjIwMTctMTItMjAgMDg6NTk6MTEiLCJsYXRpdHVkZSI6LTg1LjQyOTgsImxvbmdpdHVkZSI6LTQ5LjUwOTksImNvdW50cnkiOiJJbmRpYSJ9,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278484601363728941061978661584946,2024-01-03T14:00:18.677+0000
geo,eyJpbmQiOjcxODYsInRpbWVzdGFtcCI6IjIwMTgtMDQtMDggMDk6MDg6NDAiLCJsYXRpdHVkZSI6LTg2LjQwNjMsImxvbmdpdHVkZSI6LTEzNi42NTcsImNvdW50cnkiOiJBcnViYSJ9,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278487512457102573089100073533490,2024-01-03T14:00:20.051+0000
geo,eyJpbmQiOjcxOTksInRpbWVzdGFtcCI6IjIwMTktMDctMDQgMTk6NDQ6NTkiLCJsYXRpdHVkZSI6LTE1Ljg4NTIsImxvbmdpdHVkZSI6NTcuMTg5LCJjb3VudHJ5IjoiRWd5cHQifQ==,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278492886132370760115919081439282,2024-01-03T14:00:22.545+0000
geo,eyJpbmQiOjM3OTgsInRpbWVzdGFtcCI6IjIwMTgtMDQtMTMgMDI6NTc6NTQiLCJsYXRpdHVkZSI6LTg4LjU0NzgsImxvbmdpdHVkZSI6LTE3NC45NzEsImNvdW50cnkiOiJBZmdoYW5pc3RhbiJ9,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278497875369228309690660532781106,2024-01-03T14:00:24.909+0000
geo,eyJpbmQiOjEwNjIyLCJ0aW1lc3RhbXAiOiIyMDIxLTA2LTEzIDA2OjUxOjEzIiwibGF0aXR1ZGUiOi01Mi43ODYxLCJsb25naXR1ZGUiOjY2LjAyNDksImNvdW50cnkiOiJIdW5nYXJ5In0=,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278500791298305220176298643554354,2024-01-03T14:00:26.254+0000
geo,eyJpbmQiOjQ5MTUsInRpbWVzdGFtcCI6IjIwMTgtMDctMDggMTQ6NDQ6MzgiLCJsYXRpdHVkZSI6LTM1LjE5NSwibG9uZ2l0dWRlIjowLjM3MzY0MywiY291bnRyeSI6Ikx1eGVtYm91cmcifQ==,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278503847462777205958921020244018,2024-01-03T14:00:27.607+0000
geo,eyJpbmQiOjU3NDksInRpbWVzdGFtcCI6IjIwMjAtMDQtMDkgMTc6MDQ6MDgiLCJsYXRpdHVkZSI6LTMzLjY4OTIsImxvbmdpdHVkZSI6LTcxLjMxODksImNvdW50cnkiOiJJbmRvbmVzaWEifQ==,streaming-0eb84f80c29b-geo,shardId-000000000003,49646988456249075449968278509066395540482313274385236018,2024-01-03T14:00:29.955+0000


In [0]:
# Reading user data in Databricks
df_user = spark \
.readStream \
.format('kinesis') \
.option('streamName',"streaming-0eb84f80c29b-user") \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()
display(df_user)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
user,eyJpbmQiOjQ5NywiZmlyc3RfbmFtZSI6IkFkYW0iLCJsYXN0X25hbWUiOiJBY29zdGEiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjEgMjE6MjY6NDUifQ==,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293025091199863368942090996154418,2024-01-03T14:00:12.789+0000
user,eyJpbmQiOjEwMTIwLCJmaXJzdF9uYW1lIjoiSmFjb2IiLCJsYXN0X25hbWUiOiJCYWlyZCIsImFnZSI6MjQsImRhdGVfam9pbmVkIjoiMjAxNi0wMi0yNSAwODo1Mjo0NSJ9,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293026222754430528235067240611890,2024-01-03T14:00:14.175+0000
user,eyJpbmQiOjE4NywiZmlyc3RfbmFtZSI6IkxhdXJhIiwibGFzdF9uYW1lIjoiS25pZ2h0IiwiYWdlIjoyMSwiZGF0ZV9qb2luZWQiOiIyMDE2LTEyLTAyIDAzOjM0OjA2In0=,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293027842715028811838298785841202,2024-01-03T14:00:16.539+0000
user,eyJpbmQiOjI3MzksImZpcnN0X25hbWUiOiJCcmFuZGkiLCJsYXN0X25hbWUiOiJDZXJ2YW50ZXMiLCJhZ2UiOjM1LCJkYXRlX2pvaW5lZCI6IjIwMTYtMDUtMzEgMDE6NDA6NDQifQ==,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293029463884552915056228225253426,2024-01-03T14:00:19.042+0000
user,eyJpbmQiOjcxODYsImZpcnN0X25hbWUiOiJBbHZpbiIsImxhc3RfbmFtZSI6IkFkYW1zIiwiYWdlIjoyMCwiZGF0ZV9qb2luZWQiOiIyMDE2LTAxLTAxIDEzOjUwOjQwIn0=,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293030594230194254734575295004722,2024-01-03T14:00:20.436+0000
user,eyJpbmQiOjcxOTksImZpcnN0X25hbWUiOiJLZXZpbiIsImxhc3RfbmFtZSI6IkdyYW50IiwiYWdlIjozMywiZGF0ZV9qb2luZWQiOiIyMDE2LTA4LTA5IDA4OjIyOjQ5In0=,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293032580495315881570446776205362,2024-01-03T14:00:22.921+0000
user,eyJpbmQiOjM3OTgsImZpcnN0X25hbWUiOiJBbGV4YW5kcmlhIiwibGFzdF9uYW1lIjoiQWx2YXJhZG8iLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMjMgMDQ6MTM6MjMifQ==,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293034780740307580195750899875890,2024-01-03T14:00:25.272+0000
user,eyJpbmQiOjEwNjIyLCJmaXJzdF9uYW1lIjoiS2VuZHJhIiwibGFzdF9uYW1lIjoiTWlsbGVyIiwiYWdlIjoyNCwiZGF0ZV9qb2luZWQiOiIyMDE1LTEwLTIxIDExOjIyOjUzIn0=,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293035797446921876098886827769906,2024-01-03T14:00:26.590+0000
user,eyJpbmQiOjQ5MTUsImZpcnN0X25hbWUiOiJNYXJrIiwibGFzdF9uYW1lIjoiRWxsaW90dCIsImFnZSI6NDYsImRhdGVfam9pbmVkIjoiMjAxNy0wNy0xNSAwMzo1Njo1NyJ9,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293036955597857066913842354716722,2024-01-03T14:00:27.966+0000
user,eyJpbmQiOjU3NDksImZpcnN0X25hbWUiOiJKb2VsIiwibGFzdF9uYW1lIjoiT2JyaWVuIiwiYWdlIjoyOSwiZGF0ZV9qb2luZWQiOiIyMDE2LTA5LTEyIDEyOjMzOjMzIn0=,streaming-0eb84f80c29b-user,shardId-000000000003,49646999109939477653604293038594901268464351140695244850,2024-01-03T14:00:30.285+0000


In [0]:
# creating functions that we will reuse

def get_stream(stream_name: str):
    '''Uses spark.readStream to get Kinesis stream and returns stream as dataframe'''
    dataframe = spark \
    .readStream \
    .format('kinesis') \
    .option('streamName', stream_name) \
    .option('initialPosition','earliest') \
    .option('region','us-east-1') \
    .option('awsAccessKey', ACCESS_KEY) \
    .option('awsSecretKey', SECRET_KEY) \
    .load()
    return dataframe

def deserialize_stream(stream, schema):
    '''Takes stream dataframe and schema, deserializes data from stream and returns data as dataframe'''
    dataframe = stream \
    .selectExpr("CAST(data as STRING)") \
    .withColumn("data", from_json(col("data"), schema)) \
    .select(col("data.*"))
    return dataframe
    
def add_nulls_to_dataframe_column(dataframe, column, value_to_replace):
    '''Converts matched values in column of dataframe to null based on expression'''
    dataframe = dataframe.withColumn(column, when(col(column).like(value_to_replace), None).otherwise(col(column)))
    return dataframe

# deseralising data
pin_schema = StructType([
    StructField("index", IntegerType()),
    StructField("unique_id", StringType()),
    StructField("title", StringType()),
    StructField("description", StringType()),
    StructField("poster_name", StringType()),
    StructField("follower_count", StringType()),
    StructField("tag_list", StringType()),
    StructField("is_image_or_video", StringType()),
    StructField("image_src", StringType()),
    StructField("downloaded", IntegerType()),
    StructField("save_location", StringType()),
    StructField("category", StringType())
])
geo_schema = StructType([
    StructField("ind", IntegerType()),
    StructField("timestamp", TimestampType()),
    StructField("latitude", FloatType()),
    StructField("longitude", FloatType()),
    StructField("country", StringType())
])
user_schema = StructType([
    StructField("ind", IntegerType()),
    StructField("first_name", StringType()),
    StructField("last_name", StringType()),
    StructField("age", StringType()),
    StructField("date_joined", TimestampType())
])

pin_stream = get_stream('streaming-0eb84f80c29b-pin')
geo_stream = get_stream('streaming-0eb84f80c29b-geo')
user_stream = get_stream('streaming-0eb84f80c29b-user')

df_pin = deserialize_stream(pin_stream, pin_schema)
df_geo = deserialize_stream(geo_stream, geo_schema)
df_user = deserialize_stream(user_stream, user_schema)

In [0]:
# clean the data
# replace empty entries and entries with no relevant data in each column with Nones
# column names and values to change to null
columns_and_values_for_null = {
    "description": "No description available%",
    "follower_count": "User Info Error",
    "image_src": "Image src error.",
    "poster_name": "User Info Error",
    "tag_list": "N,o, ,T,a,g,s, ,A,v,a,i,l,a,b,l,e",
    "title": "No Title Data Available"
}
# loop through dictionary, calling function with dictionary values as arguments
for key, value in columns_and_values_for_null.items():
    df_pin = add_nulls_to_dataframe_column(df_pin, key, value)
# make sure every entry is a number
df_pin = df_pin.withColumn("follower_count", regexp_replace("follower_count", "k", "000"))
df_pin = df_pin.withColumn("follower_count", regexp_replace("follower_count", "M", "000000"))
# cast follower_count column to integer type
df_pin = df_pin.withColumn("follower_count", col("follower_count").cast('int'))
# convert save_location column to include only the save location path
df_pin = df_pin.withColumn("save_location", regexp_replace("save_location", "Local save in ", ""))
# rename the index column to ind
df_pin = df_pin.withColumnRenamed("index", "ind")
# re-structure columns
new_pin_column_order = [
    "ind",
    "unique_id",
    "title",
    "description",
    "follower_count",
    "poster_name",
    "tag_list",
    "is_image_or_video",
    "image_src",
    "save_location",
    "category"
]
df_pin = df_pin.select(new_pin_column_order)

# clean df_geo
# define function for returning list containing two values
def combine_lat_and_long(latitude, longitude):
    return [latitude, longitude]
# define new user-defined function
new_func = udf(combine_lat_and_long, ArrayType(DoubleType()))
# apply new udf to combine latitude and longitude columns
df_geo = df_geo.withColumn("coordinates", new_func("latitude", "longitude"))
# drop the latitude and longitude columns
cols_to_drop = ("latitude", "longitude")
df_geo = df_geo.drop(*cols_to_drop)
# convert timestamp column from type string to type timestamp
df_geo = df_geo.withColumn("timestamp", to_timestamp("timestamp"))
# change column order
new_geo_column_order = [
    "ind",
    "country",
    "coordinates",
    "timestamp",
]
df_geo = df_geo.select(new_geo_column_order)

# clean df_user

# create new column for full name
df_user = df_user.withColumn("user_name", concat_ws(" ", "first_name", "last_name"))
# drop the first_name and last_name columns
cols_to_drop = ("first_name", "last_name")
df_user = df_user.drop(*cols_to_drop)
# convert date_joined column from type string to type timestamp
df_user = df_user.withColumn("date_joined", to_timestamp("date_joined"))
# change column order
new_user_column_order = [
    "ind",
    "user_name",
    "age",
    "date_joined",
]
df_user = df_user.select(new_user_column_order)


In [0]:
# printing the tables
display(df_pin)



ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category
497,511a0584-6bc4-4cf0-8963-9ddadefec9bc,30 Pics Of A Family That Uses Chalk Art To Go On Adventures During The Lockdown,A family in Atlanta use bright chalk crayons to create wondrous settings for the family’s children to play in.,2000000.0,Bored Panda,"Chalk Photography,Chalk Pictures,Sidewalk Chalk Art,Chalkboard Art,Art Plastique,Easy Drawings,Art For Kids,Art Projects,Street Art",image,https://i.pinimg.com/originals/e6/ff/9e/e6ff9e9235742b6f37c550ac73bd3c83.jpg,/data/art,art
10120,9d18af84-7e86-42d1-8415-63cf1f8dd45e,Where to Stay in London + The Best Hotels in London: Area by Area,Travelling to London and wondering where to stay? Don't miss this insider's guide to the best areas to stay in London and the best hotels in London. Cool things to do and places…,376.0,London x London - Cool Things To Do + London Travel,"London Places,London Hotels,London England Hotels,London England Travel,London Restaurants,Cool Places To Visit,Places To Travel,Places To Go,Travel Destinations",image,https://i.pinimg.com/originals/53/bb/29/53bb29993450a1fb5223bcaf524d9deb.jpg,/data/travel,travel
187,be83b47e-2725-4fd7-9b08-22fe7ff67990,Watercolor Paintings from Etsy - The Honeycomb Home,We picked our favorite watercolor paintings from Etsy and listed them all here. These eleven paintings would brighten up any space!,48000.0,The Honeycomb Home,"Watercolor Art Diy,Watercolor Art Lessons,Watercolor Pictures,Watercolor Landscape Paintings,Landscape Prints,Nature Paintings,Landscape Art,Water Color Painting Landscape,Landscapes To Paint",image,https://i.pinimg.com/originals/95/4d/97/954d97097205bcc1bb872889819928eb.png,/data/art,art
2739,1006be65-0eae-4537-ada1-f68c37b37ec0,25+ Easy Thanksgiving Crafts for Kids,These quick and easy Thanksgiving crafts for kids can be made in under 30 minutes using items that you probably already have around the house!,167000.0,"Happiness is Homemade | Crafts, Printables, Party Ideas, & More!","Free Thanksgiving Printables,Thanksgiving Crafts For Kids,Holiday Crafts,Fun Crafts,Fall Crafts For Toddlers,Thanksgiving Turkey,Fall Toddler Crafts,Thanksgiving Decorations,Simple Crafts",image,https://i.pinimg.com/originals/f2/33/3e/f2333e4887b36ff61e1cfd3c2fd4180e.jpg,/data/diy-and-crafts,diy-and-crafts
7186,e3badfe6-acfc-4cc5-96c5-f8ba620830b6,Red Hot Chili Peppers - Scar Tissue - Women's T-shirt - Heather Dark Grey / S,"Women's t-shirt. Design inspired by the song ""Scar Tissue"" by Californian band Red Hot Chili Peppers. This song was the first single from the album Californication (1999) and al…",27.0,Mala Rock | Rock T-shirts,"John Frusciante,Hottest Chili Pepper,Rock T Shirts,Timeless Classic,Fabric Weights,Heather Grey,Album,T Shirts For Women,Band",image,https://i.pinimg.com/originals/49/95/c7/4995c7746c77bdd3a484d8d608c67f59.jpg,/data/mens-fashion,mens-fashion
7199,9fd29a6c-853f-44ba-ae87-76bc909cdf2e,Sélection Bottines Homme Automne / Hiver 2021 | Guide Complet,"Guide d'achat et conseils. Quelles Bottes et Chaussures Montantes porter cet Automne Hiver 2021 ? Notre sélection de bottines Chelsea Boots, Desert Boots, Chukka pour homme.",10000.0,Rienasemettre.fr,"Outfit Hombre Casual,Black Outfit Men,Black Jeans Men,Winter Outfit For Men,Jeans For Men,Man Outfit,Outfit Work,Black Outfits,Jeans Fit",image,https://i.pinimg.com/originals/31/28/f7/3128f7eb3a67e16141905bb5f89712bd.jpg,/data/mens-fashion,mens-fashion
3798,3566de93-1adc-4c88-8f48-4e8073cd2942,Declining by Degrees : Higher Education at Risk (Paperback),"A provocative look at higher education as featured in The New York Times Two decades ago A Nation at Risk sounded a national alarm on K-12 education. Now, an equally urgent alar…",2000000.0,Walmart,,image,https://i.pinimg.com/originals/90/1b/e2/901be2bc9c4beca25dc9db05844beff7.jpg,/data/education,education
10622,2bea3485-c931-46b4-8c68-9068b2ad9b79,Panhard's Crab May Just Be The Future Of Armored Scout Vehicles,France's military vehicle (and once upon a time carmaker) Panhard has set out to change the way the weapons industry looks at a scout vehicles with their new three person armore…,3000.0,Joseph Coronado,"Army Vehicles,Armored Vehicles,Armored Truck,Bug Out Vehicle,Derby Cars,Armored Fighting Vehicle,Military Weapons,Military Equipment,Gi Joe",image,https://i.pinimg.com/originals/fc/73/98/fc739836485eaf5b340c125100f2b442.jpg,/data/vehicles,vehicles
4915,277a181b-1c7f-4ed8-b1c5-4d49555ae0e1,"What is the difference between macrame cord, macrame rope, and macrame","The differences, and different uses, for macrame string, macrame rope, and macrame cord.",5000.0,Niroma Studio,"Wedding Chuppah,Wedding Ceremony Backdrop,Boho Wedding,Wedding Backdrops,Casual Wedding,Wedding Reception,Wedding Centerpieces,Wedding Decorations,Macrame Wall Hanging Patterns",image,https://i.pinimg.com/originals/4a/21/a7/4a21a7d676a039658ca798bc7e095b36.png,/data/event-planning,event-planning
5749,e6332555-573d-4da2-9482-0d3cf77d7b44,6 Step Plan to Pay Off Debt and Save Money - Life and a Budget,"You may be wondering, ""How do I pay off debt and save money?"" Well, this strategy will help become debt free while saving at the same time.",24000.0,Latoya | Life and a Budget,"Budgeting Finances,Budgeting Tips,Budgeting System,Monthly Expenses,Financial Tips,Financial Planning,Retirement Planning,Financial Literacy,Financial Peace",image,https://i.pinimg.com/originals/bd/4f/58/bd4f5816d0569bec2fd1afc2161e4bb6.jpg,/data/finance,finance


In [0]:
df_pin.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", f"/tmp/kinesis/0eb84f80c29b_pin_table_checkpoints/") \
    .table(f"0eb84f80c29b_pin_table")
#write_stream_df_to_table(df_pin, "pin")
#write_stream_df_to_table(df_geo, "geo")
#write_stream_df_to_table(df_user, "user")
     

In [0]:
# writing stream to table: geo
df_geo.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", f"/tmp/kinesis/0eb84f80c29b_geo_table_checkpoints/") \
    .table(f"0eb84f80c29b_geo_table")

In [0]:
# writing stream to table: user
df_user.writeStream \
    .format("delta") \
    .outputMode("append") \
    .option("checkpointLocation", f"/tmp/kinesis/0eb84f80c29b_user_table_checkpoints/") \
    .table(f"0eb84f80c29b_user_table")

In [0]:
# to view the table go to catalog > seach for table name