In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *
import urllib

# Define the path to the Delta table
delta_table_path = "dbfs:/user/hive/warehouse/authentication_credentials"

# Read the Delta table to a Spark DataFrame
aws_keys_df = spark.read.format("delta").load(delta_table_path)

In [0]:
# Get the AWS access key and secret key from the spark dataframe
ACCESS_KEY = aws_keys_df.select('Access key ID').collect()[0]['Access key ID']
SECRET_KEY = aws_keys_df.select('Secret access key').collect()[0]['Secret access key']
# Encode the secrete key
ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")

In [0]:
%sql
-- Disable format checks during the reading of Delta tables
SET spark.databricks.delta.formatCheck.enabled=false

key,value
spark.databricks.delta.formatCheck.enabled,False


In [0]:
df_pin = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0e284c63dbbf-pin') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

In [0]:
df_geo = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0e284c63dbbf-geo') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

In [0]:
df_user = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0e284c63dbbf-user') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

In [0]:
display(df_pin)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
7a5391a2-4788-43fc-9640-01b06af97cab,eyJpbmQiOjc1MjgsImZpcnN0X25hbWUiOiJBYmlnYWlsIiwibGFzdF9uYW1lIjoiQWxpIiwiYWdlIjoyMCwiZGF0ZV9qb2luZWQiOiIyMDE1LTEwLTI0VDExOjIzOjUxIn0=,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996513412760753093216238043138,2024-03-27T19:28:06.768+0000
b520913a-75ba-4d90-821b-f2507bb4c990,eyJpbmQiOjI4NjMsImZpcnN0X25hbWUiOiJEeWxhbiIsImxhc3RfbmFtZSI6IkhvbG1lcyIsImFnZSI6MzIsImRhdGVfam9pbmVkIjoiMjAxNi0xMC0yM1QxNDowNjo1MSJ9,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996514621686572707982851702786,2024-03-27T19:28:09.336+0000
e837d7ab-7928-4c72-83a0-cb2d5eefb0fb,eyJpbmQiOjU3MzAsImZpcnN0X25hbWUiOiJSYWNoZWwiLCJsYXN0X25hbWUiOiJEYXZpcyIsImFnZSI6MzYsImRhdGVfam9pbmVkIjoiMjAxNS0xMi0wOFQyMDowMjo0MyJ9,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996515830612392322818184839170,2024-03-27T19:28:11.863+0000
96680ff0-229d-497b-9b23-68fc3716e1f4,eyJpbmQiOjQzMTUsImZpcnN0X25hbWUiOiJNaWNoZWxsZSIsImxhc3RfbmFtZSI6IlByaW5jZSIsImFnZSI6MzYsImRhdGVfam9pbmVkIjoiMjAxNS0xMi0yMFQxNjozODoxMyJ9,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996520666315670781953358954498,2024-03-27T19:28:20.274+0000
2fe80db9-5a4e-4cbb-b28c-425667c47898,eyJpbmQiOjEzMTMsImZpcnN0X25hbWUiOiJCcml0dGFueSIsImxhc3RfbmFtZSI6IkpvbmVzIiwiYWdlIjozMiwiZGF0ZV9qb2luZWQiOiIyMDE2LTA0LTAyVDAzOjUxOjIzIn0=,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996527919870588474401331609602,2024-03-27T19:29:29.059+0000
806fdb44-a983-4a8e-8ede-71aa5f9d758b,eyJpbmQiOjEwNzk0LCJmaXJzdF9uYW1lIjoiVGhvbWFzIiwibGFzdF9uYW1lIjoiVHVybmVyIiwiYWdlIjozNCwiZGF0ZV9qb2luZWQiOiIyMDE2LTEyLTIyVDAwOjAyOjAyIn0=,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996530337722227704003278405634,2024-03-27T19:29:33.115+0000
c2feb3e4-c318-477d-9822-fe674061ca8e,eyJpbmQiOjUwNjksImZpcnN0X25hbWUiOiJBbWFuZGEiLCJsYXN0X25hbWUiOiJCYWxsIiwiYWdlIjoyNSwiZGF0ZV9qb2luZWQiOiIyMDE2LTAxLTEzVDE3OjM2OjMwIn0=,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996532755573866933399066771458,2024-03-27T19:29:35.982+0000
cb8768b0-66cc-4697-b6d6-0478db57fb40,eyJpbmQiOjYxNDUsImZpcnN0X25hbWUiOiJFbWlseSIsImxhc3RfbmFtZSI6Ikhhd2tpbnMiLCJhZ2UiOjMwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTItMjVUMjA6MjQ6MzcifQ==,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996540009128784622067468206082,2024-03-27T19:29:48.951+0000
145439ce-8c56-40c2-82e3-13c51f2c0539,eyJpbmQiOjMxNTYsImZpcnN0X25hbWUiOiJBbmRyZXciLCJsYXN0X25hbWUiOiJCYWtlciIsImFnZSI6MjIsImRhdGVfam9pbmVkIjoiMjAxNS0xMi0yMVQwODowNjo1NCJ9,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996546053757882695831817027586,2024-03-27T19:29:57.803+0000
8e6e8f9e-ed4b-4891-8f90-200fffe0486c,eyJpbmQiOjg2NTMsImZpcnN0X25hbWUiOiJUYW1teSIsImxhc3RfbmFtZSI6IkRhdmlzIiwiYWdlIjoyNCwiZGF0ZV9qb2luZWQiOiIyMDE2LTAzLTA5VDEwOjQ4OjE0In0=,streaming-0e284c63dbbf-pin,shardId-000000000000,49650559117342078823387241996554516238619999335551598594,2024-03-27T19:30:13.469+0000


In [0]:
display(df_user)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
29fd503b-4031-4cac-aa7b-1ed75aa951bf,eyJpbmRleCI6NTczMCwidW5pcXVlX2lkIjoiMWUxZjBjOGItOWZjZi00NjBiLTkxNTQtYzc3NTgyNzIwNmViIiwidGl0bGUiOiJJc2xhbmQgT2FzaXMgQ291cG9uIE9yZ2FuaXplciIsImRlc2NyaXB0aW9uIjoiRGVzY3JpcHQ= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662344557461859061654267887618,2024-03-27T19:28:13.009+0000
21c58da0-26f7-4975-b98c-aeb3b7c7c106,eyJpbmRleCI6ODMwNCwidW5pcXVlX2lkIjoiNWI2ZDA5MTMtMjVlNC00M2FiLTgzOWQtODVkNTUxNmY3OGE0IiwidGl0bGUiOiJUaGUgIzEgUmVhc29uIFlvdeKAmXJlIE5vdCBIaXMgUHJpb3JpdHkgQW55bW9yZSAtIE1hdHQ= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662345766387678676489601024002,2024-03-27T19:28:15.493+0000
a36e4778-9a5d-4f9b-a470-8b2256a1a309,eyJpbmRleCI6NTczMCwidW5pcXVlX2lkIjoiMWUxZjBjOGItOWZjZi00NjBiLTkxNTQtYzc3NTgyNzIwNmViIiwidGl0bGUiOiJJc2xhbmQgT2FzaXMgQ291cG9uIE9yZ2FuaXplciIsImRlc2NyaXB0aW9uIjoiRGVzY3JpcHQ= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662353019942596368937573679106,2024-03-27T19:29:24.640+0000
99d4e34c-09cf-49ca-9b2f-2527a7c647df,eyJpbmRleCI6MjkyMywidW5pcXVlX2lkIjoiNTJmYTNhZjUtMjRhNC00Y2NiLThmMTctOWMzZWIxMjMyN2VlIiwidGl0bGUiOiJVRk8gUGFwZXIgUGxhdGUgQ3JhZnQiLCJkZXNjcmlwdGlvbiI6IkEgZnVuIHNwYWNlIGFjdGk= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662362691349153287001763479554,2024-03-27T19:29:38.098+0000
b8bb2cb1-0c96-4dd9-8e20-acd322bc2988,eyJpbmRleCI6NjA2MywidW5pcXVlX2lkIjoiNjA2OTM3MjctNDkyNy00YmQ2LWE4YzUtMDk2YTM5MmQ2M2U2IiwidGl0bGUiOiI0MSBHb3JnZW91cyBGYWxsIERlY29yIElkZWFzIEZvciBZb3VyIEhvbWUgLSBDaGF5bG9yICY= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662365109200792516466271322114,2024-03-27T19:29:41.934+0000
49b02954-1c3a-4d2d-a25a-016d0c1dbe7f,eyJpbmRleCI6NjE0NSwidW5pcXVlX2lkIjoiODJlMTNhMDctZGI5OS00M2EzLWIxYzAtODlhNGI3NTgyMWRhIiwidGl0bGUiOiJIT0xJREFZIE1BTlRMRSBERUNPUiAtIEBBTUFaT04gJiBAVEFSR0VUIEZJTkRTIiwiZGVzY3I= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662368735978251360903551254530,2024-03-27T19:29:49.972+0000
6a20cdf8-324b-4789-a2e4-06a8fd7c51d6,eyJpbmRleCI6ODg4NywidW5pcXVlX2lkIjoiNWRmOWY2ZTUtMDdmNS00Y2U4LWE4MmUtOTY1ODZiYmMwNWQ4IiwidGl0bGUiOiIyNSBVbHRyYSBTZXh5IEJhY2sgVGF0dG9vIElkZWFzIEZvciBHaXJscyIsImRlc2NyaXB0aW8= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662384452013906352938248306690,2024-03-27T19:30:16.600+0000
a8674886-e185-405a-a674-5b34b26ad66a,eyJpbmRleCI6MTAxNCwidW5pcXVlX2lkIjoiOWFjOTQzMDctYmIyNy00MTBlLWI1NTQtMGEwYWI2YzE5ODQ3IiwidGl0bGUiOiJIYWlyc3R5bGVzICYgQmVhdXR5IiwiZGVzY3JpcHRpb24iOiJVbnRpdGxlZCIsInBvc3Rlcl8= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662386869865545582540195102722,2024-03-27T19:30:21.463+0000
51513610-7634-4de3-8ad4-b5a0e95bafe8,eyJpbmRleCI6MTA2NjMsInVuaXF1ZV9pZCI6ImQ1MjA4YzE2LTgyYWUtNGZkMS04N2YwLTM2ZTVlZjc4OTQxMiIsInRpdGxlIjoiMjErIENsYXNzaWMgQ2FyIFBpY3R1cmUgb2YgdGhlIDE5NTBzIC0gVmludGFnZXRvcGlhIiw= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662400168049561344904225882114,2024-03-27T19:30:42.252+0000
f500d2b4-4403-474e-9e29-b1770e7fd120,eyJpbmRleCI6MjQ4MiwidW5pcXVlX2lkIjoiMDg2MDRmMjAtZmExNy00YjlhLTk5NDktNzgxNzE3ZWNhNmNkIiwidGl0bGUiOiJGT1JOVCBQT1JDSCBDSFJJU1RNQVMgREVDT1JBVElORyBJREVBUyIsImRlc2NyaXB0aW9uIjo= (truncated),streaming-0e284c63dbbf-user,shardId-000000000000,49650559162657193066801472662409839456118262968415682562,2024-03-27T19:30:57.388+0000


In [0]:
display(df_geo)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
7442d5a3-b896-4ab9-af5e-f7c6cd7c6620,eyJpbmQiOjI4NjMsInRpbWVzdGFtcCI6IjIwMjAtMDQtMjdUMTM6MzQ6MTYiLCJsYXRpdHVkZSI6LTUuMzQ0NDUsImxvbmdpdHVkZSI6LTE3Ny45MjQsImNvdW50cnkiOiJBcm1lbmlhIn0=,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928718001807401792878011219970,2024-03-27T19:28:09.710+0000
065ea390-83d8-44e1-b1a8-d61f4baff0e9,eyJpbmQiOjEzMTMsInRpbWVzdGFtcCI6IjIwMTgtMDYtMjZUMDI6Mzk6MjUiLCJsYXRpdHVkZSI6NzcuMDQ0NywibG9uZ2l0dWRlIjo2MS45MTE5LCJjb3VudHJ5IjoiTWFsZGl2ZXMifQ==,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928722837510680251875746381826,2024-03-27T19:28:18.141+0000
d4d55759-386e-450e-9fb4-95e5801b5505,eyJpbmQiOjgzMDQsInRpbWVzdGFtcCI6IjIwMTktMDktMTNUMDQ6NTA6MjkiLCJsYXRpdHVkZSI6LTI4Ljg4NTIsImxvbmdpdHVkZSI6LTE2NC44NywiY291bnRyeSI6IkZyZW5jaCBHdWlhbmEifQ==,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928728882139778329763263807490,2024-03-27T19:29:26.694+0000
73a7eb10-3510-4e95-a84b-aad63bec2b8a,eyJpbmQiOjg3MzEsInRpbWVzdGFtcCI6IjIwMjAtMDctMTdUMDQ6Mzk6MDkiLCJsYXRpdHVkZSI6LTgzLjEwNCwibG9uZ2l0dWRlIjotMTcxLjMwMiwiY291bnRyeSI6IkFydWJhIn0=,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928730091065597944461157990402,2024-03-27T19:29:28.056+0000
5616feed-7c8d-46e2-9206-c83ba524861e,eyJpbmQiOjMwODksInRpbWVzdGFtcCI6IjIwMTgtMDItMjhUMDU6MzE6MjkiLCJsYXRpdHVkZSI6LTg5Ljk3ODcsImxvbmdpdHVkZSI6LTE3My4yOTMsImNvdW50cnkiOiJBbGJhbmlhIn0=,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928738553546335247690014654466,2024-03-27T19:29:39.108+0000
06c7e8d3-6751-4975-b731-43dd657999e5,eyJpbmQiOjk4NzUsInRpbWVzdGFtcCI6IjIwMjAtMDMtMjBUMTM6MDM6MTgiLCJsYXRpdHVkZSI6LTc0LjMzODIsImxvbmdpdHVkZSI6LTExMC40ODQsImNvdW50cnkiOiJCYXJiYWRvcyJ9,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928747016027072551056310272002,2024-03-27T19:29:54.160+0000
d7fab744-33ea-4c6e-9634-8bf554fcd24e,eyJpbmQiOjI0MTgsInRpbWVzdGFtcCI6IjIwMjItMDUtMjdUMTE6MzA6NTkiLCJsYXRpdHVkZSI6LTg4LjQ2NDIsImxvbmdpdHVkZSI6LTE3MS4wNjEsImNvdW50cnkiOiJBbnRhcmN0aWNhICh0aGUgdGVycml0b3J5IFNvdXQ= (truncated),streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928748224952892165891643408386,2024-03-27T19:29:56.742+0000
603f70ff-528a-473b-a96d-5ff4fad81477,eyJpbmQiOjk2NzIsInRpbWVzdGFtcCI6IjIwMTktMTEtMDNUMTM6MTU6NTIiLCJsYXRpdHVkZSI6NzkuNDY1OCwibG9uZ2l0dWRlIjotNjkuNDEzMywiY291bnRyeSI6IkluZGlhIn0=,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928756687433629469257939025922,2024-03-27T19:30:11.417+0000
c27953c7-d5c3-4bbb-a40b-9d409fc87d5c,eyJpbmQiOjg4ODcsInRpbWVzdGFtcCI6IjIwMjEtMDktMTlUMDU6Mjc6NDMiLCJsYXRpdHVkZSI6LTI4LjAxMzcsImxvbmdpdHVkZSI6LTE2MC43MDgsImNvdW50cnkiOiJCb3Rzd2FuYSJ9,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928759105285268698928605298690,2024-03-27T19:30:16.226+0000
29cd5a6d-1f32-4975-a869-6be4ebb1af30,eyJpbmQiOjcxNjYsInRpbWVzdGFtcCI6IjIwMjItMDctMjVUMDM6MDc6MzciLCJsYXRpdHVkZSI6LTg2LjQwNjMsImxvbmdpdHVkZSI6LTEzNi42NTcsImNvdW50cnkiOiJBcnViYSJ9,streaming-0e284c63dbbf-geo,shardId-000000000000,49650559150168775755624322928765149914366772761673596930,2024-03-27T19:30:26.429+0000


In [0]:
# For pin data
pin_schema = StructType([
    StructField("ind", IntegerType(), True),
    StructField("unique_id", StringType(), True),
    StructField("title", StringType(), True),
    StructField("description", StringType(), True),
    StructField("follower_count", IntegerType(), True),
    StructField("poster_name", StringType(), True),
    StructField("tag_list", StringType(), True),
    StructField("is_image_or_video", StringType(), True),
    StructField("image_src", StringType(), True),
    StructField("save_location", StringType(), True),
    StructField("category", StringType(), True)
])

# "ind", "unique_id", "title", "description", "follower_count", "poster_name", "tag_list", "is_image_or_video", "image_src", "save_location", "category")

# Takes JSON data stored in a data column, parses it according to the pin_schema schema
df_pin = df_pin.selectExpr("CAST(data AS STRING)") \
                    .withColumn('data', from_json('data', pin_schema)) \
                    .select("data.*")
# # # Displaying the DataFrames
display(df_pin)




In [0]:
# df.writeStream \
#   .format("delta") \
#   .outputMode("append") \
#   .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
#   .table("<TABLE_NAME>")