In [0]:
# Import pyspark functions
from pyspark.sql.functions import *
# Import url processing
import urllib

In [0]:
# Define the path to the Delta table
delta_table_path =  "dbfs:/user/hive/warehouse/authentication_credentials"
# Reading the Delta table to a Spark DataFrame
aws_keys_df =  spark.read.format("delta").load(delta_table_path)

In [0]:
# Get the AWS access key and secret key from the spark dataframe
ACCESS_KEY = aws_keys_df.select('Access key ID').collect()[0]['Access key ID']
SECRET_KEY = aws_keys_df.select('Secret access key').collect()[0]['Secret access key']
# Encode the secrete key
ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")

In [0]:
%sql
-- Disable format checks during the reading of Delta tables
SET spark.databricks.delta.formatCheck.enabled=false

key,value
spark.databricks.delta.formatCheck.enabled,False


In [0]:
df_pin = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0affe012670f-pin') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

In [0]:
df_geo = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0affe012670f-geo') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

In [0]:
df_user = spark \
.readStream \
.format('kinesis') \
.option('streamName','streaming-0affe012670f-user') \
.option('initialPosition','earliest') \
.option('region','us-east-1') \
.option('awsAccessKey', ACCESS_KEY) \
.option('awsSecretKey', SECRET_KEY) \
.load()

In [0]:
display(df_geo)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
partition-1,eyJpbmQiOjEwOTk5LCJ0aW1lc3RhbXAiOiIyMDE5LTA0LTE5VDA3OjQ0OjA4IiwibGF0aXR1ZGUiOi01Mi4yMTcyLCJsb25naXR1ZGUiOjIxLjY3MDQsImNvdW50cnkiOiJTYWludCBNYXJ0aW4ifQ==,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000350955645169612804684062677532706,2024-08-06T17:35:49.781+0000
partition-1,eyJpbmQiOjY2MzAsInRpbWVzdGFtcCI6IjIwMjEtMTItMTJUMTc6NTQ6MTMiLCJsYXRpdHVkZSI6LTUwLjc4MTQsImxvbmdpdHVkZSI6LTExMS45NTQsImNvdW50cnkiOiJCb3V2ZXQgSXNsYW5kIChCb3V2ZXRveWEpIn0=,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000350972906212465262360243766034466,2024-08-06T17:36:02.126+0000
partition-1,eyJpbmQiOjE0OTgsInRpbWVzdGFtcCI6IjIwMjEtMTAtMjlUMDY6NDE6MjgiLCJsYXRpdHVkZSI6LTg1LjI5NjUsImxvbmdpdHVkZSI6LTEwMS43MiwiY291bnRyeSI6IkdyZWVubGFuZCJ9,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000351552117079752466596279177183266,2024-08-06T17:41:35.236+0000
partition-1,eyJpbmQiOjY0MTYsInRpbWVzdGFtcCI6IjIwMjAtMTAtMThUMDM6MTU6MjgiLCJsYXRpdHVkZSI6LTg5LjYzLCJsb25naXR1ZGUiOi0xNzkuMDIyLCJjb3VudHJ5IjoiQXJnZW50aW5hIn0=,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000351603662049923375542256550281250,2024-08-06T17:42:01.701+0000
partition-1,eyJpbmQiOjQ3NjcsInRpbWVzdGFtcCI6IjIwMjItMDMtMTVUMTQ6Mjk6NDciLCJsYXRpdHVkZSI6LTYzLjY3ODUsImxvbmdpdHVkZSI6LTI5LjI5NDMsImNvdW50cnkiOiJIdW5nYXJ5In0=,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000351610286963414863710408818032674,2024-08-06T17:42:05.342+0000
partition-1,eyJpbmQiOjkyMTMsInRpbWVzdGFtcCI6IjIwMjEtMDctMDRUMDg6Mjg6NDgiLCJsYXRpdHVkZSI6LTg5LjUxNzMsImxvbmdpdHVkZSI6LTE3OS42ODksImNvdW50cnkiOiJBbGdlcmlhIn0=,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000362247312138416706042009072173090,2024-08-06T19:22:05.393+0000
partition-1,eyJpbmQiOjg1NTksInRpbWVzdGFtcCI6IjIwMTktMDQtMTlUMDM6NTY6MTEiLCJsYXRpdHVkZSI6LTg4LjUyNTUsImxvbmdpdHVkZSI6LTE2MS42NDQsImNvdW50cnkiOiJBbWVyaWNhbiBTYW1vYSJ9,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000362258007505142836666661295095842,2024-08-06T19:22:10.293+0000
partition-1,eyJpbmQiOjQwNjcsInRpbWVzdGFtcCI6IjIwMjAtMDMtMDFUMTQ6MzQ6MjAiLCJsYXRpdHVkZSI6LTg1LjA2NDcsImxvbmdpdHVkZSI6LTE0Ni44NzksImNvdW50cnkiOiJCYWhhbWFzIn0=,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000362266316452301048013185209073698,2024-08-06T19:22:14.339+0000
partition-1,eyJpbmQiOjM3MywidGltZXN0YW1wIjoiMjAyMi0wMi0wM1QwMjozOTozMCIsImxhdGl0dWRlIjozMS4xOTMzLCJsb25naXR1ZGUiOjEzNC45MTMsImNvdW50cnkiOiJVbml0ZWQgU3RhdGVzIE1pbm9yIE91dGx5aW5nIElzbGE= (truncated),streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000362392898239169617380293637832738,2024-08-06T19:23:22.127+0000
partition-1,eyJpbmQiOjc4MjEsInRpbWVzdGFtcCI6IjIwMjAtMDUtMTNUMTQ6NTY6MDQiLCJsYXRpdHVkZSI6NjEuNTU0NSwibG9uZ2l0dWRlIjo5MC45MzE0LCJjb3VudHJ5IjoiQ2hpbGUifQ==,streaming-0affe012670f-geo,shardId-000000000002,49654616295885580145000362400380281067212320530772262946,2024-08-06T19:23:26.856+0000


In [0]:
display(df_pin)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
partition-1,eyJpbmRleCI6NTY1NiwidW5pcXVlX2lkIjoiOTc3ZDM0YzctYzM0Ny00ZGQzLTkzZTAtNzY3NDI4ZjZkMzU5IiwidGl0bGUiOiIyMCBQYXNzaXZlIEluY29tZSBGaW5hbmNlIEludmVzdGluZyBJZGVhcyBhbmQgSG93IFRvIEI= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037638202651255479956880763478081570,2024-08-06T17:07:32.786+0000
partition-1,eyJpbmRleCI6MjQxOSwidW5pcXVlX2lkIjoiOThjY2VjMjgtZjYzYS00OTdmLTg4MjQtMjZlMzI5YjIxYTEyIiwidGl0bGUiOiJUaGUgQmVzdCBDaHJpc3RtYXMgUGFydHkgR2FtZXMgRm9yIFRoZSBXaG9sZSBGYW1pbHkgLSA= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037639813658549143494060132779687970,2024-08-06T17:14:33.857+0000
partition-1,eyJpbmRleCI6OTI0MiwidW5pcXVlX2lkIjoiOWMxODZlY2ItYWU2Yi00YmNiLWEzMjAtNDEyZTIyM2VkNDc4IiwidGl0bGUiOiJUb3AgODUgQmVzdCBGcmllbmQgVGF0dG9vIElkZWFzIC0gWzIwMjEgSW5zcGlyYXRpb24gR3U= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037639870641267651029607175908491298,2024-08-06T17:14:46.891+0000
partition-1,eyJpbmRleCI6ODIzOSwidW5pcXVlX2lkIjoiMTk2YTFjNjUtNjczMi00ZTQwLWJlMjItNTBhMTg2YzQyZGZlIiwidGl0bGUiOiIxMCBJbnNwaXJhdGlvbmFsIFF1b3RlcyBPZiBUaGUgRGF5ICgyMjQpIiwiZGVzY3JpcHRpb24= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037640714458191558025024287390302242,2024-08-06T17:18:11.114+0000
partition-1,eyJpbmRleCI6OTE5MiwidW5pcXVlX2lkIjoiZGIxN2Q3YzgtZDg5OS00MWI4LWEzNzctZTU0ZTNiYzBlMjQ5IiwidGl0bGUiOiIzMCBHb3JnZW91cyBBbmQgQW1hemluZyBGaW5nZXIgVGF0dG9vIElkZWFzIC0gV29tZW4gRmE= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037641464136061891629268045538525218,2024-08-06T17:21:43.085+0000
partition-1,eyJpbmRleCI6MzcwMiwidW5pcXVlX2lkIjoiYTA0YWFmNmQtYjM0Yy00NTcwLTg5Y2ItNmIwNTEzOWM5YzM4IiwidGl0bGUiOiJTaWxlbnQgRXBpZGVtaWMgOiBQcm9ibGVtcyBpbiBQdWJsaWMgRWR1Y2F0aW9uIChQYXBlcmI= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037641486242479429102377546532519970,2024-08-06T17:21:50.209+0000
partition-1,eyJpbmRleCI6NDIxMCwidW5pcXVlX2lkIjoiOTE3ODI5MTMtNGRhNi00YzlmLTk4MzQtOWQ3MDY0ZDhiOTg3IiwidGl0bGUiOiJIb3cgdG8gVXNlIFZpc3VhbHMgdG8gSGVscCB3aXRoIEV4ZWN1dGl2ZSBGdW5jdGlvbmluZyI= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037641496955980042527221498937081890,2024-08-06T17:21:53.436+0000
partition-1,eyJpbmRleCI6MTA0MSwidW5pcXVlX2lkIjoiZmZhZjMwNzktZjViNC00NzFkLWJlNjktZTY1NWFkYjMyMWJlIiwidGl0bGUiOiJSb2JvdCBDaGFsbGVuZ2UgU2NyZWVuIiwiZGVzY3JpcHRpb24iOiJUaGVzZSBhcmUgbXkgdG8= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037642169659125589628793833853550626,2024-08-06T17:24:47.485+0000
partition-1,eyJpbmRleCI6MjkzNCwidW5pcXVlX2lkIjoiMTQ3NGQ0MzAtOTU2Yy00ZGRkLWI5N2MtNmFmNDc0NjQyZDlmIiwidGl0bGUiOiJESVkgQmxlc3MgWW91IE1hc29uIEphciBUaXNzdWUgRGlzcGVuc2VyIOKAlCBEYXkgdG8gRGE= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037642496682022276123294387008438306,2024-08-06T17:26:19.370+0000
partition-1,eyJpbmRleCI6NDM3NCwidW5pcXVlX2lkIjoiYjU0ZTllNWUtNjlkYS00MGVkLWJjZmUtODA5NDM5YTMxZjM0IiwidGl0bGUiOiI1MSBHb29kIEV2ZW50IFBsYW5uaW5nIFNsb2dhbnMgYW5kIFRhZ2xpbmVzIiwiZGVzY3JpcHQ= (truncated),streaming-0affe012670f-pin,shardId-000000000002,49654577026235751110037642964114399355939288830894407714,2024-08-06T17:28:36.388+0000


In [0]:
display(df_user)

partitionKey,data,stream,shardId,sequenceNumber,approximateArrivalTimestamp
partition-1,eyJpbmQiOjE0OTgsImZpcnN0X25hbWUiOiJCaWFuY2EiLCJsYXN0X25hbWUiOiJKb2huc29uIiwiYWdlIjoyOSwiZGF0ZV9qb2luZWQiOiIyMDE2LTAxLTI5VDEwOjQ4OjE2In0=,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326054199789066502998244490775363618,2024-08-06T17:41:35.611+0000
partition-1,eyJpbmQiOjY0MTYsImZpcnN0X25hbWUiOiJBbGJlcnQiLCJsYXN0X25hbWUiOiJBbGxpc29uIiwiYWdlIjoyMCwiZGF0ZV9qb2luZWQiOiIyMDE1LTEwLTIxVDIyOjI3OjI3In0=,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326054221722607648266463325456433186,2024-08-06T17:42:02.065+0000
partition-1,eyJpbmQiOjQ3NjcsImZpcnN0X25hbWUiOiJNYWRlbGluZSIsImxhc3RfbmFtZSI6IkJyb3duIiwiYWdlIjo0MywiZGF0ZV9qb2luZWQiOiIyMDE2LTExLTExVDE1OjUzOjE1In0=,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326054225560947125542911230026448930,2024-08-06T17:42:05.720+0000
partition-1,eyJpbmQiOjkyMTMsImZpcnN0X25hbWUiOiJBYXJvbiIsImxhc3RfbmFtZSI6IkFiYm90dCIsImFnZSI6MjAsImRhdGVfam9pbmVkIjoiMjAxNS0xMC0yM1QxNjowODo0MSJ9,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326061196618914989050284755302481954,2024-08-06T19:22:05.962+0000
partition-1,eyJpbmQiOjg1NTksImZpcnN0X25hbWUiOiJBbGV4YW5kZXIiLCJsYXN0X25hbWUiOiJCYWlsZXkiLCJhZ2UiOjIwLCJkYXRlX2pvaW5lZCI6IjIwMTUtMTAtMzBUMTE6Mzc6MzIifQ==,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326061200682114668775053823806799906,2024-08-06T19:22:10.663+0000
partition-1,eyJpbmQiOjQwNjcsImZpcnN0X25hbWUiOiJBbmdlbGEiLCJsYXN0X25hbWUiOiJCZWNrZXIiLCJhZ2UiOjI2LCJkYXRlX2pvaW5lZCI6IjIwMTUtMTEtMjNUMTg6NTg6MTQifQ==,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326061204134806809594434952926068770,2024-08-06T19:22:14.716+0000
partition-1,eyJpbmQiOjM3MywiZmlyc3RfbmFtZSI6IkNocmlzdG9waGVyIiwibGFzdF9uYW1lIjoiU21pdGgiLCJhZ2UiOjQ4LCJkYXRlX2pvaW5lZCI6IjIwMTctMDUtMjFUMDI6NDQ6MDIifQ==,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326061256343477255471815164711403554,2024-08-06T19:23:22.510+0000
partition-1,eyJpbmQiOjc4MjEsImZpcnN0X25hbWUiOiJKZWZmIiwibGFzdF9uYW1lIjoiSm9uZXMiLCJhZ2UiOjU3LCJkYXRlX2pvaW5lZCI6IjIwMTctMDctMTFUMDA6MDc6MTUifQ==,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326061260112907961030229275042644002,2024-08-06T19:23:27.501+0000
partition-1,eyJpbmQiOjEwODgsImZpcnN0X25hbWUiOiJOaW5hIiwibGFzdF9uYW1lIjoiU21pdGgiLCJhZ2UiOjUxLCJkYXRlX2pvaW5lZCI6IjIwMTctMDYtMjlUMDg6MjI6NDkifQ==,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326063211967165058355235866584023074,2024-08-06T19:42:14.365+0000
partition-1,eyJpbmQiOjYzMDIsImZpcnN0X25hbWUiOiJKYXNvbiIsImxhc3RfbmFtZSI6IkJyb29rcyIsImFnZSI6MjgsImRhdGVfam9pbmVkIjoiMjAxNS0xMi0yMVQwMDozMzo1MSJ9,streaming-0affe012670f-user,shardId-000000000002,49654585493739500011326063609546599355018363544688132130,2024-08-06T19:49:44.830+0000


In [0]:
df_geo = df_geo.selectExpr("CAST(data as STRING)")
display(df_geo)

data
"{""ind"":10999,""timestamp"":""2019-04-19T07:44:08"",""latitude"":-52.2172,""longitude"":21.6704,""country"":""Saint Martin""}"
"{""ind"":6630,""timestamp"":""2021-12-12T17:54:13"",""latitude"":-50.7814,""longitude"":-111.954,""country"":""Bouvet Island (Bouvetoya)""}"
"{""ind"":1498,""timestamp"":""2021-10-29T06:41:28"",""latitude"":-85.2965,""longitude"":-101.72,""country"":""Greenland""}"
"{""ind"":6416,""timestamp"":""2020-10-18T03:15:28"",""latitude"":-89.63,""longitude"":-179.022,""country"":""Argentina""}"
"{""ind"":4767,""timestamp"":""2022-03-15T14:29:47"",""latitude"":-63.6785,""longitude"":-29.2943,""country"":""Hungary""}"
"{""ind"":9213,""timestamp"":""2021-07-04T08:28:48"",""latitude"":-89.5173,""longitude"":-179.689,""country"":""Algeria""}"
"{""ind"":8559,""timestamp"":""2019-04-19T03:56:11"",""latitude"":-88.5255,""longitude"":-161.644,""country"":""American Samoa""}"
"{""ind"":4067,""timestamp"":""2020-03-01T14:34:20"",""latitude"":-85.0647,""longitude"":-146.879,""country"":""Bahamas""}"
"{""ind"":373,""timestamp"":""2022-02-03T02:39:30"",""latitude"":31.1933,""longitude"":134.913,""country"":""United States Minor Outlying Islands""}"
"{""ind"":7821,""timestamp"":""2020-05-13T14:56:04"",""latitude"":61.5545,""longitude"":90.9314,""country"":""Chile""}"


In [0]:
df_pin = df_pin.selectExpr("CAST(data as STRING)")
display(df_pin)

data
"{""index"":5656,""unique_id"":""977d34c7-c347-4dd3-93e0-767428f6d359"",""title"":""20 Passive Income Finance Investing Ideas and How To Begin Investing Money for Dividends"",""description"":""Want safe investment ideas? Try the best dividend stocks, top dividend stocks, and highest paying dividends stocks for passive income. It's one of great ways to invest money and… "",""poster_name"":""Dividends Diversify: Money Matters So Build Wealth & Be Rich"",""follower_count"":""28k"",""tag_list"":""Ways To Earn Money,Way To Make Money,Make Money Online,Investment Portfolio,Investment Advice,Retirement Money,Safe Investments,Dividend Investing,Dividend Stocks"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/a3/d0/d9/a3d0d96da9e5552746d2b9e8bb9df4e8.jpg"",""downloaded"":1,""save_location"":""Local save in /data/finance"",""category"":""finance""}"
"{""index"":2419,""unique_id"":""98ccec28-f63a-497f-8824-26e329b21a12"",""title"":""The Best Christmas Party Games For The Whole Family - I Heart Arts n Crafts"",""description"":""Add some fun to your Christmas party with these simple festive games that the whole family is going to love!"",""poster_name"":""Jackie | I Heart Arts N Crafts"",""follower_count"":""89k"",""tag_list"":""Fun Christmas Party Ideas,Christmas Birthday Party,Christmas Crafts For Kids,Holiday Crafts,Ideas Party,Christmas Party Games For Kids,Christmas Games For Preschoolers,Classroom Christmas Decor,Family Christmas Activities"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/4c/39/77/4c39779fe174c4da8cb7e923fe7749af.jpg"",""downloaded"":1,""save_location"":""Local save in /data/christmas"",""category"":""christmas""}"
"{""index"":9242,""unique_id"":""9c186ecb-ae6b-4bcb-a320-412e223ed478"",""title"":""Top 85 Best Friend Tattoo Ideas - [2021 Inspiration Guide]"",""description"":""Best friend tattoos are a great way to commemorate an important experience or relationship. Click here for the top 85 best ink ideas and BFF tattoo designs!"",""poster_name"":""Next Luxury"",""follower_count"":""800k"",""tag_list"":""Bff Tattoos,Dope Tattoos,Mini Tattoos,Modern Tattoos,Simplistic Tattoos,Pretty Tattoos,Unique Tattoos,Small Tattoos,Tattoo Couples"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/d4/d4/0b/d4d40b7abe568651d09639bee9a8ff33.jpg"",""downloaded"":1,""save_location"":""Local save in /data/tattoos"",""category"":""tattoos""}"
"{""index"":8239,""unique_id"":""196a1c65-6732-4e40-be22-50a186c42dfe"",""title"":""10 Inspirational Quotes Of The Day (224)"",""description"":""photo credit: Pinterest"",""poster_name"":""Lifehack"",""follower_count"":""1M"",""tag_list"":""Thank You Quotes,Cute Quotes,Words Quotes,Wise Words,Quotes To Live By,Sayings,You Rock Quotes,Trust Quotes,Motivational Quotes"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/1c/20/42/1c204295a5882b4c3d4ae1bb193efc52.png"",""downloaded"":1,""save_location"":""Local save in /data/quotes"",""category"":""quotes""}"
"{""index"":9192,""unique_id"":""db17d7c8-d899-41b8-a377-e54e3bc0e249"",""title"":""30 Gorgeous And Amazing Finger Tattoo Ideas - Women Fashion Lifestyle Blog Shinecoco.com"",""description"":""What do you think about finger tattoos? Good-looking they will help you to show the world an idea you have in mind without putting a huge picture on your skin. We love how simpl… "",""poster_name"":""Shinecoco"",""follower_count"":""101k"",""tag_list"":""Finger Tattoo Designs,Cool Finger Tattoos,Finger Tattoo For Women,Hand Tattoos For Women,Cool Tattoos,Simple Finger Tattoo,Sexy Tattoos,Tattoos For Fingers,Womens Finger Tattoos"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/9a/81/80/9a8180eeecfd891f4c26caf49eceba8a.jpg"",""downloaded"":1,""save_location"":""Local save in /data/tattoos"",""category"":""tattoos""}"
"{""index"":3702,""unique_id"":""a04aaf6d-b34c-4570-89cb-6b05139c9c38"",""title"":""Silent Epidemic : Problems in Public Education (Paperback)"",""description"":""Silent Epidemic: Problems in Public Education"",""poster_name"":""Walmart"",""follower_count"":""2M"",""tag_list"":""Edd,Public,Education,Walmart,Products,Onderwijs,Learning,Gadget"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/40/7f/95/407f955191f4cb03a5864a84b41f988e.jpg"",""downloaded"":1,""save_location"":""Local save in /data/education"",""category"":""education""}"
"{""index"":4210,""unique_id"":""91782913-4da6-4c9f-9834-9d7064d8b987"",""title"":""How to Use Visuals to Help with Executive Functioning"",""description"":""Do you students with autism struggle with executive functioning skills, such as initiation, planning, and emotional control? Read more about how I use visuals with my student wi… "",""poster_name"":""The Autism Vault"",""follower_count"":""5k"",""tag_list"":""Autism Education,Preschool Special Education,Gifted Education,Autism Classroom,Education Quotes,Education Logo,Social Skills Activities,Teaching Social Skills,Shape Activities"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/08/ba/72/08ba72687bf6ecdc98a4835fd93f5f9d.jpg"",""downloaded"":1,""save_location"":""Local save in /data/education"",""category"":""education""}"
"{""index"":1041,""unique_id"":""ffaf3079-f5b4-471d-be69-e655adb321be"",""title"":""Robot Challenge Screen"",""description"":""These are my top Korean beauty products on Amazon! I love them because they are affordable and effective. Get your K-beauty on!"",""poster_name"":""Melody | LA Vida Color | Beauty, Health & Wellness Blog"",""follower_count"":""2k"",""tag_list"":""Skin Care Regimen,Skin Care Tips,Kosmetik Online Shop,Beauty Hacks For Teens,Beauty Ideas,Skin Care Routine For 20s,Skin Tag,Image Skincare,Skin Care"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/40/62/3d/40623d335fc89628b6b9322da268af92.jpg"",""downloaded"":1,""save_location"":""Local save in /data/beauty"",""category"":""beauty""}"
"{""index"":2934,""unique_id"":""1474d430-956c-4ddd-b97c-6af474642d9f"",""title"":""DIY Bless You Mason Jar Tissue Dispenser — Day to Day Adventures"",""description"":""I have a confession to make. Mason jars are my addiction. I can't NOT buy them at a garage sale. My daughter says that I have \""jar\"" a problem. She's probably right. Last summer… "",""poster_name"":""DaytoDay Adventures | Blue Daisy | Home Decor | Crafts | Rag Rugs"",""follower_count"":""21k"",""tag_list"":""Mason Jar Projects,Mason Jar Crafts,Crafts With Jars,Pickle Jar Crafts,Mason Jar Christmas Crafts,Christmas Foods,Christmas Diy,Diy Projects To Try,Craft Projects"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/40/9b/ec/409bec5ee67e22028fcebfea31b8f1a4.jpg"",""downloaded"":1,""save_location"":""Local save in /data/diy-and-crafts"",""category"":""diy-and-crafts""}"
"{""index"":4374,""unique_id"":""b54e9e5e-69da-40ed-bcfe-809439a31f34"",""title"":""51 Good Event Planning Slogans and Taglines"",""description"":""We have compiled a list of some of the catchiest event planning slogans and tag lines ever thought up."",""poster_name"":""The Blog Millionaire Podcast"",""follower_count"":""86k"",""tag_list"":""Event Planning Template,Event Planning Quotes,Event Planning Checklist,Event Planning Business,Party Planning,Catering Business,Event Ideas,Catchy Business Name Ideas,Business Ideas"",""is_image_or_video"":""image"",""image_src"":""https://i.pinimg.com/originals/79/72/3a/79723a61f36e99ae00f672fb35951f92.png"",""downloaded"":1,""save_location"":""Local save in /data/event-planning"",""category"":""event-planning""}"


In [0]:
df_user = df_user.selectExpr("CAST(data as STRING)")
display(df_user)

data
"{""ind"":1498,""first_name"":""Bianca"",""last_name"":""Johnson"",""age"":29,""date_joined"":""2016-01-29T10:48:16""}"
"{""ind"":6416,""first_name"":""Albert"",""last_name"":""Allison"",""age"":20,""date_joined"":""2015-10-21T22:27:27""}"
"{""ind"":4767,""first_name"":""Madeline"",""last_name"":""Brown"",""age"":43,""date_joined"":""2016-11-11T15:53:15""}"
"{""ind"":9213,""first_name"":""Aaron"",""last_name"":""Abbott"",""age"":20,""date_joined"":""2015-10-23T16:08:41""}"
"{""ind"":8559,""first_name"":""Alexander"",""last_name"":""Bailey"",""age"":20,""date_joined"":""2015-10-30T11:37:32""}"
"{""ind"":4067,""first_name"":""Angela"",""last_name"":""Becker"",""age"":26,""date_joined"":""2015-11-23T18:58:14""}"
"{""ind"":373,""first_name"":""Christopher"",""last_name"":""Smith"",""age"":48,""date_joined"":""2017-05-21T02:44:02""}"
"{""ind"":7821,""first_name"":""Jeff"",""last_name"":""Jones"",""age"":57,""date_joined"":""2017-07-11T00:07:15""}"
"{""ind"":1088,""first_name"":""Nina"",""last_name"":""Smith"",""age"":51,""date_joined"":""2017-06-29T08:22:49""}"
"{""ind"":6302,""first_name"":""Jason"",""last_name"":""Brooks"",""age"":28,""date_joined"":""2015-12-21T00:33:51""}"


In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType

# Define the schema for the JSON data
schema = StructType([
    StructField("ind", IntegerType(), True),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("age", IntegerType(), True),
    StructField("date_joined", DateType(), True)
])

# Assuming the string data in df_user is JSON, we can convert it to proper columns
df_user_expanded = df_user.withColumn("data", F.from_json("data", schema))
df_user_final = df_user_expanded.select(F.col("data.*"))

display(df_user_final)


ind,first_name,last_name,age,date_joined
1498,Bianca,Johnson,29,2016-01-29
6416,Albert,Allison,20,2015-10-21
4767,Madeline,Brown,43,2016-11-11
9213,Aaron,Abbott,20,2015-10-23
8559,Alexander,Bailey,20,2015-10-30
4067,Angela,Becker,26,2015-11-23
373,Christopher,Smith,48,2017-05-21
7821,Jeff,Jones,57,2017-07-11
1088,Nina,Smith,51,2017-06-29
6302,Jason,Brooks,28,2015-12-21


In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType

# Define the schema for the JSON data
schema = StructType([
    StructField("index", IntegerType(), True),
    StructField("unique_id", StringType(), True),
    StructField("title", StringType(), True),
    StructField("description", StringType(), True),
    StructField("poster_name", StringType(), True),
    StructField("follower_count", StringType(), True),
    StructField("tag_list", StringType(), True),
    StructField("is_image_or_video", StringType(), True),
    StructField("image_src", StringType(), True),
    StructField("Downloaded", IntegerType(), True),
    StructField("save_location", StringType(), True),
    StructField("category", StringType(), True)
])

# The string data in df_pin is JSON, we can convert it to proper columns
df_pin_expanded = df_pin.withColumn("data", F.from_json("data", schema))
df_pin_final = df_pin_expanded.select(F.col("data.*"))

display(df_pin_final)

index,unique_id,title,description,poster_name,follower_count,tag_list,is_image_or_video,image_src,Downloaded,save_location,category
5656,977d34c7-c347-4dd3-93e0-767428f6d359,20 Passive Income Finance Investing Ideas and How To Begin Investing Money for Dividends,"Want safe investment ideas? Try the best dividend stocks, top dividend stocks, and highest paying dividends stocks for passive income. It's one of great ways to invest money and…",Dividends Diversify: Money Matters So Build Wealth & Be Rich,28k,"Ways To Earn Money,Way To Make Money,Make Money Online,Investment Portfolio,Investment Advice,Retirement Money,Safe Investments,Dividend Investing,Dividend Stocks",image,https://i.pinimg.com/originals/a3/d0/d9/a3d0d96da9e5552746d2b9e8bb9df4e8.jpg,,Local save in /data/finance,finance
2419,98ccec28-f63a-497f-8824-26e329b21a12,The Best Christmas Party Games For The Whole Family - I Heart Arts n Crafts,Add some fun to your Christmas party with these simple festive games that the whole family is going to love!,Jackie | I Heart Arts N Crafts,89k,"Fun Christmas Party Ideas,Christmas Birthday Party,Christmas Crafts For Kids,Holiday Crafts,Ideas Party,Christmas Party Games For Kids,Christmas Games For Preschoolers,Classroom Christmas Decor,Family Christmas Activities",image,https://i.pinimg.com/originals/4c/39/77/4c39779fe174c4da8cb7e923fe7749af.jpg,,Local save in /data/christmas,christmas
9242,9c186ecb-ae6b-4bcb-a320-412e223ed478,Top 85 Best Friend Tattoo Ideas - [2021 Inspiration Guide],Best friend tattoos are a great way to commemorate an important experience or relationship. Click here for the top 85 best ink ideas and BFF tattoo designs!,Next Luxury,800k,"Bff Tattoos,Dope Tattoos,Mini Tattoos,Modern Tattoos,Simplistic Tattoos,Pretty Tattoos,Unique Tattoos,Small Tattoos,Tattoo Couples",image,https://i.pinimg.com/originals/d4/d4/0b/d4d40b7abe568651d09639bee9a8ff33.jpg,,Local save in /data/tattoos,tattoos
8239,196a1c65-6732-4e40-be22-50a186c42dfe,10 Inspirational Quotes Of The Day (224),photo credit: Pinterest,Lifehack,1M,"Thank You Quotes,Cute Quotes,Words Quotes,Wise Words,Quotes To Live By,Sayings,You Rock Quotes,Trust Quotes,Motivational Quotes",image,https://i.pinimg.com/originals/1c/20/42/1c204295a5882b4c3d4ae1bb193efc52.png,,Local save in /data/quotes,quotes
9192,db17d7c8-d899-41b8-a377-e54e3bc0e249,30 Gorgeous And Amazing Finger Tattoo Ideas - Women Fashion Lifestyle Blog Shinecoco.com,What do you think about finger tattoos? Good-looking they will help you to show the world an idea you have in mind without putting a huge picture on your skin. We love how simpl…,Shinecoco,101k,"Finger Tattoo Designs,Cool Finger Tattoos,Finger Tattoo For Women,Hand Tattoos For Women,Cool Tattoos,Simple Finger Tattoo,Sexy Tattoos,Tattoos For Fingers,Womens Finger Tattoos",image,https://i.pinimg.com/originals/9a/81/80/9a8180eeecfd891f4c26caf49eceba8a.jpg,,Local save in /data/tattoos,tattoos
3702,a04aaf6d-b34c-4570-89cb-6b05139c9c38,Silent Epidemic : Problems in Public Education (Paperback),Silent Epidemic: Problems in Public Education,Walmart,2M,"Edd,Public,Education,Walmart,Products,Onderwijs,Learning,Gadget",image,https://i.pinimg.com/originals/40/7f/95/407f955191f4cb03a5864a84b41f988e.jpg,,Local save in /data/education,education
4210,91782913-4da6-4c9f-9834-9d7064d8b987,How to Use Visuals to Help with Executive Functioning,"Do you students with autism struggle with executive functioning skills, such as initiation, planning, and emotional control? Read more about how I use visuals with my student wi…",The Autism Vault,5k,"Autism Education,Preschool Special Education,Gifted Education,Autism Classroom,Education Quotes,Education Logo,Social Skills Activities,Teaching Social Skills,Shape Activities",image,https://i.pinimg.com/originals/08/ba/72/08ba72687bf6ecdc98a4835fd93f5f9d.jpg,,Local save in /data/education,education
1041,ffaf3079-f5b4-471d-be69-e655adb321be,Robot Challenge Screen,These are my top Korean beauty products on Amazon! I love them because they are affordable and effective. Get your K-beauty on!,"Melody | LA Vida Color | Beauty, Health & Wellness Blog",2k,"Skin Care Regimen,Skin Care Tips,Kosmetik Online Shop,Beauty Hacks For Teens,Beauty Ideas,Skin Care Routine For 20s,Skin Tag,Image Skincare,Skin Care",image,https://i.pinimg.com/originals/40/62/3d/40623d335fc89628b6b9322da268af92.jpg,,Local save in /data/beauty,beauty
2934,1474d430-956c-4ddd-b97c-6af474642d9f,DIY Bless You Mason Jar Tissue Dispenser — Day to Day Adventures,"I have a confession to make. Mason jars are my addiction. I can't NOT buy them at a garage sale. My daughter says that I have ""jar"" a problem. She's probably right. Last summer…",DaytoDay Adventures | Blue Daisy | Home Decor | Crafts | Rag Rugs,21k,"Mason Jar Projects,Mason Jar Crafts,Crafts With Jars,Pickle Jar Crafts,Mason Jar Christmas Crafts,Christmas Foods,Christmas Diy,Diy Projects To Try,Craft Projects",image,https://i.pinimg.com/originals/40/9b/ec/409bec5ee67e22028fcebfea31b8f1a4.jpg,,Local save in /data/diy-and-crafts,diy-and-crafts
4374,b54e9e5e-69da-40ed-bcfe-809439a31f34,51 Good Event Planning Slogans and Taglines,We have compiled a list of some of the catchiest event planning slogans and tag lines ever thought up.,The Blog Millionaire Podcast,86k,"Event Planning Template,Event Planning Quotes,Event Planning Checklist,Event Planning Business,Party Planning,Catering Business,Event Ideas,Catchy Business Name Ideas,Business Ideas",image,https://i.pinimg.com/originals/79/72/3a/79723a61f36e99ae00f672fb35951f92.png,,Local save in /data/event-planning,event-planning


In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType

# Define the schema for the JSON data
schema = StructType([
    StructField("ind", IntegerType(), True),
    StructField("longitude", StringType(), True),
    StructField("latitude", StringType(), True),
    StructField("country", StringType(), True),
    StructField("timestamp", DateType(), True)
])

# Assuming the string data in df_user is JSON, we can convert it to proper columns
df_geo_expanded = df_geo.withColumn("data", F.from_json("data", schema))
df_geo_final = df_geo_expanded.select(F.col("data.*"))

display(df_geo_final)

ind,longitude,latitude,country,timestamp
10999,21.6704,-52.2172,Saint Martin,2019-04-19
6630,-111.954,-50.7814,Bouvet Island (Bouvetoya),2021-12-12
1498,-101.72,-85.2965,Greenland,2021-10-29
6416,-179.022,-89.63,Argentina,2020-10-18
4767,-29.2943,-63.6785,Hungary,2022-03-15
9213,-179.689,-89.5173,Algeria,2021-07-04
8559,-161.644,-88.5255,American Samoa,2019-04-19
4067,-146.879,-85.0647,Bahamas,2020-03-01
373,134.913,31.1933,United States Minor Outlying Islands,2022-02-03
7821,90.9314,61.5545,Chile,2020-05-13


# After Transforming the Data, Let's perform some cleaning

In [0]:
from pyspark.sql.functions import col, when
# To clean the df_pin DataFrame you should perform some transformations:
# Replace empty entries and entries with no relevant data in each column with Nones

df_pin_cleaned = df_pin_final.select([when(col(c) == "", None).otherwise(col(c)).alias(c) for c in df_pin_final.columns])
df_geo_cleaned = df_geo_final.select([when(col(c) == "", None).otherwise(col(c)).alias(c) for c in df_geo_final.columns])
df_user_cleaned = df_user_final.select([when(col(c) == "", None).otherwise(col(c)).alias(c) for c in df_user_final.columns])

display(df_pin_cleaned)
display(df_geo_cleaned)
display(df_user_cleaned)

ind,longitude,latitude,country,timestamp
10999,21.6704,-52.2172,Saint Martin,2019-04-19
6630,-111.954,-50.7814,Bouvet Island (Bouvetoya),2021-12-12
1498,-101.72,-85.2965,Greenland,2021-10-29
6416,-179.022,-89.63,Argentina,2020-10-18
4767,-29.2943,-63.6785,Hungary,2022-03-15
9213,-179.689,-89.5173,Algeria,2021-07-04
8559,-161.644,-88.5255,American Samoa,2019-04-19
4067,-146.879,-85.0647,Bahamas,2020-03-01
373,134.913,31.1933,United States Minor Outlying Islands,2022-02-03
7821,90.9314,61.5545,Chile,2020-05-13


In [0]:
#Perform the necessary transformations on the follower_count to ensure every entry is a number. Make sure the data type of this column is an int.

from pyspark.sql.functions import udf
from pyspark.sql.types import IntegerType

# UDF to convert follower_count with 'k' and 'M' to an integer
def convert_follower_count(follower_count):
    if follower_count is None:
        return None
    if 'k' in follower_count:
        return int(float(follower_count.replace('k', '')) * 1000)
    elif 'M' in follower_count:
        return int(float(follower_count.replace('M', '')) * 1000000)
    elif follower_count.isdigit():
        return int(follower_count)
    else:
        return follower_count

convert_follower_count_udf = udf(convert_follower_count, IntegerType())

# Apply the UDF to the follower_count column
df_pin_transformed = df_pin_cleaned.withColumn("follower_count", convert_follower_count_udf("follower_count"))

display(df_pin_transformed)

index,unique_id,title,description,poster_name,follower_count,tag_list,is_image_or_video,image_src,Downloaded,save_location,category
5656,977d34c7-c347-4dd3-93e0-767428f6d359,20 Passive Income Finance Investing Ideas and How To Begin Investing Money for Dividends,"Want safe investment ideas? Try the best dividend stocks, top dividend stocks, and highest paying dividends stocks for passive income. It's one of great ways to invest money and…",Dividends Diversify: Money Matters So Build Wealth & Be Rich,28000.0,"Ways To Earn Money,Way To Make Money,Make Money Online,Investment Portfolio,Investment Advice,Retirement Money,Safe Investments,Dividend Investing,Dividend Stocks",image,https://i.pinimg.com/originals/a3/d0/d9/a3d0d96da9e5552746d2b9e8bb9df4e8.jpg,,Local save in /data/finance,finance
2419,98ccec28-f63a-497f-8824-26e329b21a12,The Best Christmas Party Games For The Whole Family - I Heart Arts n Crafts,Add some fun to your Christmas party with these simple festive games that the whole family is going to love!,Jackie | I Heart Arts N Crafts,89000.0,"Fun Christmas Party Ideas,Christmas Birthday Party,Christmas Crafts For Kids,Holiday Crafts,Ideas Party,Christmas Party Games For Kids,Christmas Games For Preschoolers,Classroom Christmas Decor,Family Christmas Activities",image,https://i.pinimg.com/originals/4c/39/77/4c39779fe174c4da8cb7e923fe7749af.jpg,,Local save in /data/christmas,christmas
9242,9c186ecb-ae6b-4bcb-a320-412e223ed478,Top 85 Best Friend Tattoo Ideas - [2021 Inspiration Guide],Best friend tattoos are a great way to commemorate an important experience or relationship. Click here for the top 85 best ink ideas and BFF tattoo designs!,Next Luxury,800000.0,"Bff Tattoos,Dope Tattoos,Mini Tattoos,Modern Tattoos,Simplistic Tattoos,Pretty Tattoos,Unique Tattoos,Small Tattoos,Tattoo Couples",image,https://i.pinimg.com/originals/d4/d4/0b/d4d40b7abe568651d09639bee9a8ff33.jpg,,Local save in /data/tattoos,tattoos
8239,196a1c65-6732-4e40-be22-50a186c42dfe,10 Inspirational Quotes Of The Day (224),photo credit: Pinterest,Lifehack,1000000.0,"Thank You Quotes,Cute Quotes,Words Quotes,Wise Words,Quotes To Live By,Sayings,You Rock Quotes,Trust Quotes,Motivational Quotes",image,https://i.pinimg.com/originals/1c/20/42/1c204295a5882b4c3d4ae1bb193efc52.png,,Local save in /data/quotes,quotes
9192,db17d7c8-d899-41b8-a377-e54e3bc0e249,30 Gorgeous And Amazing Finger Tattoo Ideas - Women Fashion Lifestyle Blog Shinecoco.com,What do you think about finger tattoos? Good-looking they will help you to show the world an idea you have in mind without putting a huge picture on your skin. We love how simpl…,Shinecoco,101000.0,"Finger Tattoo Designs,Cool Finger Tattoos,Finger Tattoo For Women,Hand Tattoos For Women,Cool Tattoos,Simple Finger Tattoo,Sexy Tattoos,Tattoos For Fingers,Womens Finger Tattoos",image,https://i.pinimg.com/originals/9a/81/80/9a8180eeecfd891f4c26caf49eceba8a.jpg,,Local save in /data/tattoos,tattoos
3702,a04aaf6d-b34c-4570-89cb-6b05139c9c38,Silent Epidemic : Problems in Public Education (Paperback),Silent Epidemic: Problems in Public Education,Walmart,2000000.0,"Edd,Public,Education,Walmart,Products,Onderwijs,Learning,Gadget",image,https://i.pinimg.com/originals/40/7f/95/407f955191f4cb03a5864a84b41f988e.jpg,,Local save in /data/education,education
4210,91782913-4da6-4c9f-9834-9d7064d8b987,How to Use Visuals to Help with Executive Functioning,"Do you students with autism struggle with executive functioning skills, such as initiation, planning, and emotional control? Read more about how I use visuals with my student wi…",The Autism Vault,5000.0,"Autism Education,Preschool Special Education,Gifted Education,Autism Classroom,Education Quotes,Education Logo,Social Skills Activities,Teaching Social Skills,Shape Activities",image,https://i.pinimg.com/originals/08/ba/72/08ba72687bf6ecdc98a4835fd93f5f9d.jpg,,Local save in /data/education,education
1041,ffaf3079-f5b4-471d-be69-e655adb321be,Robot Challenge Screen,These are my top Korean beauty products on Amazon! I love them because they are affordable and effective. Get your K-beauty on!,"Melody | LA Vida Color | Beauty, Health & Wellness Blog",2000.0,"Skin Care Regimen,Skin Care Tips,Kosmetik Online Shop,Beauty Hacks For Teens,Beauty Ideas,Skin Care Routine For 20s,Skin Tag,Image Skincare,Skin Care",image,https://i.pinimg.com/originals/40/62/3d/40623d335fc89628b6b9322da268af92.jpg,,Local save in /data/beauty,beauty
2934,1474d430-956c-4ddd-b97c-6af474642d9f,DIY Bless You Mason Jar Tissue Dispenser — Day to Day Adventures,"I have a confession to make. Mason jars are my addiction. I can't NOT buy them at a garage sale. My daughter says that I have ""jar"" a problem. She's probably right. Last summer…",DaytoDay Adventures | Blue Daisy | Home Decor | Crafts | Rag Rugs,21000.0,"Mason Jar Projects,Mason Jar Crafts,Crafts With Jars,Pickle Jar Crafts,Mason Jar Christmas Crafts,Christmas Foods,Christmas Diy,Diy Projects To Try,Craft Projects",image,https://i.pinimg.com/originals/40/9b/ec/409bec5ee67e22028fcebfea31b8f1a4.jpg,,Local save in /data/diy-and-crafts,diy-and-crafts
4374,b54e9e5e-69da-40ed-bcfe-809439a31f34,51 Good Event Planning Slogans and Taglines,We have compiled a list of some of the catchiest event planning slogans and tag lines ever thought up.,The Blog Millionaire Podcast,86000.0,"Event Planning Template,Event Planning Quotes,Event Planning Checklist,Event Planning Business,Party Planning,Catering Business,Event Ideas,Catchy Business Name Ideas,Business Ideas",image,https://i.pinimg.com/originals/79/72/3a/79723a61f36e99ae00f672fb35951f92.png,,Local save in /data/event-planning,event-planning


In [0]:
#Clean the data in the save_location column to include only the save location path

from pyspark.sql.functions import regexp_replace

# Clean the save_location column
df_pin_cleaned_location = df_pin_transformed.withColumn("save_location", regexp_replace("save_location", "^Local save in ", ""))

display(df_pin_cleaned_location)

index,unique_id,title,description,poster_name,follower_count,tag_list,is_image_or_video,image_src,Downloaded,save_location,category
5656,977d34c7-c347-4dd3-93e0-767428f6d359,20 Passive Income Finance Investing Ideas and How To Begin Investing Money for Dividends,"Want safe investment ideas? Try the best dividend stocks, top dividend stocks, and highest paying dividends stocks for passive income. It's one of great ways to invest money and…",Dividends Diversify: Money Matters So Build Wealth & Be Rich,28000.0,"Ways To Earn Money,Way To Make Money,Make Money Online,Investment Portfolio,Investment Advice,Retirement Money,Safe Investments,Dividend Investing,Dividend Stocks",image,https://i.pinimg.com/originals/a3/d0/d9/a3d0d96da9e5552746d2b9e8bb9df4e8.jpg,,/data/finance,finance
2419,98ccec28-f63a-497f-8824-26e329b21a12,The Best Christmas Party Games For The Whole Family - I Heart Arts n Crafts,Add some fun to your Christmas party with these simple festive games that the whole family is going to love!,Jackie | I Heart Arts N Crafts,89000.0,"Fun Christmas Party Ideas,Christmas Birthday Party,Christmas Crafts For Kids,Holiday Crafts,Ideas Party,Christmas Party Games For Kids,Christmas Games For Preschoolers,Classroom Christmas Decor,Family Christmas Activities",image,https://i.pinimg.com/originals/4c/39/77/4c39779fe174c4da8cb7e923fe7749af.jpg,,/data/christmas,christmas
9242,9c186ecb-ae6b-4bcb-a320-412e223ed478,Top 85 Best Friend Tattoo Ideas - [2021 Inspiration Guide],Best friend tattoos are a great way to commemorate an important experience or relationship. Click here for the top 85 best ink ideas and BFF tattoo designs!,Next Luxury,800000.0,"Bff Tattoos,Dope Tattoos,Mini Tattoos,Modern Tattoos,Simplistic Tattoos,Pretty Tattoos,Unique Tattoos,Small Tattoos,Tattoo Couples",image,https://i.pinimg.com/originals/d4/d4/0b/d4d40b7abe568651d09639bee9a8ff33.jpg,,/data/tattoos,tattoos
8239,196a1c65-6732-4e40-be22-50a186c42dfe,10 Inspirational Quotes Of The Day (224),photo credit: Pinterest,Lifehack,1000000.0,"Thank You Quotes,Cute Quotes,Words Quotes,Wise Words,Quotes To Live By,Sayings,You Rock Quotes,Trust Quotes,Motivational Quotes",image,https://i.pinimg.com/originals/1c/20/42/1c204295a5882b4c3d4ae1bb193efc52.png,,/data/quotes,quotes
9192,db17d7c8-d899-41b8-a377-e54e3bc0e249,30 Gorgeous And Amazing Finger Tattoo Ideas - Women Fashion Lifestyle Blog Shinecoco.com,What do you think about finger tattoos? Good-looking they will help you to show the world an idea you have in mind without putting a huge picture on your skin. We love how simpl…,Shinecoco,101000.0,"Finger Tattoo Designs,Cool Finger Tattoos,Finger Tattoo For Women,Hand Tattoos For Women,Cool Tattoos,Simple Finger Tattoo,Sexy Tattoos,Tattoos For Fingers,Womens Finger Tattoos",image,https://i.pinimg.com/originals/9a/81/80/9a8180eeecfd891f4c26caf49eceba8a.jpg,,/data/tattoos,tattoos
3702,a04aaf6d-b34c-4570-89cb-6b05139c9c38,Silent Epidemic : Problems in Public Education (Paperback),Silent Epidemic: Problems in Public Education,Walmart,2000000.0,"Edd,Public,Education,Walmart,Products,Onderwijs,Learning,Gadget",image,https://i.pinimg.com/originals/40/7f/95/407f955191f4cb03a5864a84b41f988e.jpg,,/data/education,education
4210,91782913-4da6-4c9f-9834-9d7064d8b987,How to Use Visuals to Help with Executive Functioning,"Do you students with autism struggle with executive functioning skills, such as initiation, planning, and emotional control? Read more about how I use visuals with my student wi…",The Autism Vault,5000.0,"Autism Education,Preschool Special Education,Gifted Education,Autism Classroom,Education Quotes,Education Logo,Social Skills Activities,Teaching Social Skills,Shape Activities",image,https://i.pinimg.com/originals/08/ba/72/08ba72687bf6ecdc98a4835fd93f5f9d.jpg,,/data/education,education
1041,ffaf3079-f5b4-471d-be69-e655adb321be,Robot Challenge Screen,These are my top Korean beauty products on Amazon! I love them because they are affordable and effective. Get your K-beauty on!,"Melody | LA Vida Color | Beauty, Health & Wellness Blog",2000.0,"Skin Care Regimen,Skin Care Tips,Kosmetik Online Shop,Beauty Hacks For Teens,Beauty Ideas,Skin Care Routine For 20s,Skin Tag,Image Skincare,Skin Care",image,https://i.pinimg.com/originals/40/62/3d/40623d335fc89628b6b9322da268af92.jpg,,/data/beauty,beauty
2934,1474d430-956c-4ddd-b97c-6af474642d9f,DIY Bless You Mason Jar Tissue Dispenser — Day to Day Adventures,"I have a confession to make. Mason jars are my addiction. I can't NOT buy them at a garage sale. My daughter says that I have ""jar"" a problem. She's probably right. Last summer…",DaytoDay Adventures | Blue Daisy | Home Decor | Crafts | Rag Rugs,21000.0,"Mason Jar Projects,Mason Jar Crafts,Crafts With Jars,Pickle Jar Crafts,Mason Jar Christmas Crafts,Christmas Foods,Christmas Diy,Diy Projects To Try,Craft Projects",image,https://i.pinimg.com/originals/40/9b/ec/409bec5ee67e22028fcebfea31b8f1a4.jpg,,/data/diy-and-crafts,diy-and-crafts
4374,b54e9e5e-69da-40ed-bcfe-809439a31f34,51 Good Event Planning Slogans and Taglines,We have compiled a list of some of the catchiest event planning slogans and tag lines ever thought up.,The Blog Millionaire Podcast,86000.0,"Event Planning Template,Event Planning Quotes,Event Planning Checklist,Event Planning Business,Party Planning,Catering Business,Event Ideas,Catchy Business Name Ideas,Business Ideas",image,https://i.pinimg.com/originals/79/72/3a/79723a61f36e99ae00f672fb35951f92.png,,/data/event-planning,event-planning


In [0]:
# Rename the index column to ind.

from pyspark.sql.functions import col

# Rename the index column to ind
df_pin_renamed = df_pin_cleaned_location.withColumnRenamed("index", "ind")

display(df_pin_renamed)

ind,unique_id,title,description,poster_name,follower_count,tag_list,is_image_or_video,image_src,Downloaded,save_location,category
5656,977d34c7-c347-4dd3-93e0-767428f6d359,20 Passive Income Finance Investing Ideas and How To Begin Investing Money for Dividends,"Want safe investment ideas? Try the best dividend stocks, top dividend stocks, and highest paying dividends stocks for passive income. It's one of great ways to invest money and…",Dividends Diversify: Money Matters So Build Wealth & Be Rich,28000.0,"Ways To Earn Money,Way To Make Money,Make Money Online,Investment Portfolio,Investment Advice,Retirement Money,Safe Investments,Dividend Investing,Dividend Stocks",image,https://i.pinimg.com/originals/a3/d0/d9/a3d0d96da9e5552746d2b9e8bb9df4e8.jpg,,/data/finance,finance
2419,98ccec28-f63a-497f-8824-26e329b21a12,The Best Christmas Party Games For The Whole Family - I Heart Arts n Crafts,Add some fun to your Christmas party with these simple festive games that the whole family is going to love!,Jackie | I Heart Arts N Crafts,89000.0,"Fun Christmas Party Ideas,Christmas Birthday Party,Christmas Crafts For Kids,Holiday Crafts,Ideas Party,Christmas Party Games For Kids,Christmas Games For Preschoolers,Classroom Christmas Decor,Family Christmas Activities",image,https://i.pinimg.com/originals/4c/39/77/4c39779fe174c4da8cb7e923fe7749af.jpg,,/data/christmas,christmas
9242,9c186ecb-ae6b-4bcb-a320-412e223ed478,Top 85 Best Friend Tattoo Ideas - [2021 Inspiration Guide],Best friend tattoos are a great way to commemorate an important experience or relationship. Click here for the top 85 best ink ideas and BFF tattoo designs!,Next Luxury,800000.0,"Bff Tattoos,Dope Tattoos,Mini Tattoos,Modern Tattoos,Simplistic Tattoos,Pretty Tattoos,Unique Tattoos,Small Tattoos,Tattoo Couples",image,https://i.pinimg.com/originals/d4/d4/0b/d4d40b7abe568651d09639bee9a8ff33.jpg,,/data/tattoos,tattoos
8239,196a1c65-6732-4e40-be22-50a186c42dfe,10 Inspirational Quotes Of The Day (224),photo credit: Pinterest,Lifehack,1000000.0,"Thank You Quotes,Cute Quotes,Words Quotes,Wise Words,Quotes To Live By,Sayings,You Rock Quotes,Trust Quotes,Motivational Quotes",image,https://i.pinimg.com/originals/1c/20/42/1c204295a5882b4c3d4ae1bb193efc52.png,,/data/quotes,quotes
9192,db17d7c8-d899-41b8-a377-e54e3bc0e249,30 Gorgeous And Amazing Finger Tattoo Ideas - Women Fashion Lifestyle Blog Shinecoco.com,What do you think about finger tattoos? Good-looking they will help you to show the world an idea you have in mind without putting a huge picture on your skin. We love how simpl…,Shinecoco,101000.0,"Finger Tattoo Designs,Cool Finger Tattoos,Finger Tattoo For Women,Hand Tattoos For Women,Cool Tattoos,Simple Finger Tattoo,Sexy Tattoos,Tattoos For Fingers,Womens Finger Tattoos",image,https://i.pinimg.com/originals/9a/81/80/9a8180eeecfd891f4c26caf49eceba8a.jpg,,/data/tattoos,tattoos
3702,a04aaf6d-b34c-4570-89cb-6b05139c9c38,Silent Epidemic : Problems in Public Education (Paperback),Silent Epidemic: Problems in Public Education,Walmart,2000000.0,"Edd,Public,Education,Walmart,Products,Onderwijs,Learning,Gadget",image,https://i.pinimg.com/originals/40/7f/95/407f955191f4cb03a5864a84b41f988e.jpg,,/data/education,education
4210,91782913-4da6-4c9f-9834-9d7064d8b987,How to Use Visuals to Help with Executive Functioning,"Do you students with autism struggle with executive functioning skills, such as initiation, planning, and emotional control? Read more about how I use visuals with my student wi…",The Autism Vault,5000.0,"Autism Education,Preschool Special Education,Gifted Education,Autism Classroom,Education Quotes,Education Logo,Social Skills Activities,Teaching Social Skills,Shape Activities",image,https://i.pinimg.com/originals/08/ba/72/08ba72687bf6ecdc98a4835fd93f5f9d.jpg,,/data/education,education
1041,ffaf3079-f5b4-471d-be69-e655adb321be,Robot Challenge Screen,These are my top Korean beauty products on Amazon! I love them because they are affordable and effective. Get your K-beauty on!,"Melody | LA Vida Color | Beauty, Health & Wellness Blog",2000.0,"Skin Care Regimen,Skin Care Tips,Kosmetik Online Shop,Beauty Hacks For Teens,Beauty Ideas,Skin Care Routine For 20s,Skin Tag,Image Skincare,Skin Care",image,https://i.pinimg.com/originals/40/62/3d/40623d335fc89628b6b9322da268af92.jpg,,/data/beauty,beauty
2934,1474d430-956c-4ddd-b97c-6af474642d9f,DIY Bless You Mason Jar Tissue Dispenser — Day to Day Adventures,"I have a confession to make. Mason jars are my addiction. I can't NOT buy them at a garage sale. My daughter says that I have ""jar"" a problem. She's probably right. Last summer…",DaytoDay Adventures | Blue Daisy | Home Decor | Crafts | Rag Rugs,21000.0,"Mason Jar Projects,Mason Jar Crafts,Crafts With Jars,Pickle Jar Crafts,Mason Jar Christmas Crafts,Christmas Foods,Christmas Diy,Diy Projects To Try,Craft Projects",image,https://i.pinimg.com/originals/40/9b/ec/409bec5ee67e22028fcebfea31b8f1a4.jpg,,/data/diy-and-crafts,diy-and-crafts
4374,b54e9e5e-69da-40ed-bcfe-809439a31f34,51 Good Event Planning Slogans and Taglines,We have compiled a list of some of the catchiest event planning slogans and tag lines ever thought up.,The Blog Millionaire Podcast,86000.0,"Event Planning Template,Event Planning Quotes,Event Planning Checklist,Event Planning Business,Party Planning,Catering Business,Event Ideas,Catchy Business Name Ideas,Business Ideas",image,https://i.pinimg.com/originals/79/72/3a/79723a61f36e99ae00f672fb35951f92.png,,/data/event-planning,event-planning


In [0]:
# Reorder the DataFrame columns
df_pin_clean = df_pin_renamed.select(
    "ind",
    "unique_id",
    "title",
    "description",
    "follower_count",
    "poster_name",
    "tag_list",
    "is_image_or_video",
    "image_src",
    "save_location",
    "category"
)

display(df_pin_clean)

ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category
5656,977d34c7-c347-4dd3-93e0-767428f6d359,20 Passive Income Finance Investing Ideas and How To Begin Investing Money for Dividends,"Want safe investment ideas? Try the best dividend stocks, top dividend stocks, and highest paying dividends stocks for passive income. It's one of great ways to invest money and…",28000.0,Dividends Diversify: Money Matters So Build Wealth & Be Rich,"Ways To Earn Money,Way To Make Money,Make Money Online,Investment Portfolio,Investment Advice,Retirement Money,Safe Investments,Dividend Investing,Dividend Stocks",image,https://i.pinimg.com/originals/a3/d0/d9/a3d0d96da9e5552746d2b9e8bb9df4e8.jpg,/data/finance,finance
2419,98ccec28-f63a-497f-8824-26e329b21a12,The Best Christmas Party Games For The Whole Family - I Heart Arts n Crafts,Add some fun to your Christmas party with these simple festive games that the whole family is going to love!,89000.0,Jackie | I Heart Arts N Crafts,"Fun Christmas Party Ideas,Christmas Birthday Party,Christmas Crafts For Kids,Holiday Crafts,Ideas Party,Christmas Party Games For Kids,Christmas Games For Preschoolers,Classroom Christmas Decor,Family Christmas Activities",image,https://i.pinimg.com/originals/4c/39/77/4c39779fe174c4da8cb7e923fe7749af.jpg,/data/christmas,christmas
9242,9c186ecb-ae6b-4bcb-a320-412e223ed478,Top 85 Best Friend Tattoo Ideas - [2021 Inspiration Guide],Best friend tattoos are a great way to commemorate an important experience or relationship. Click here for the top 85 best ink ideas and BFF tattoo designs!,800000.0,Next Luxury,"Bff Tattoos,Dope Tattoos,Mini Tattoos,Modern Tattoos,Simplistic Tattoos,Pretty Tattoos,Unique Tattoos,Small Tattoos,Tattoo Couples",image,https://i.pinimg.com/originals/d4/d4/0b/d4d40b7abe568651d09639bee9a8ff33.jpg,/data/tattoos,tattoos
8239,196a1c65-6732-4e40-be22-50a186c42dfe,10 Inspirational Quotes Of The Day (224),photo credit: Pinterest,1000000.0,Lifehack,"Thank You Quotes,Cute Quotes,Words Quotes,Wise Words,Quotes To Live By,Sayings,You Rock Quotes,Trust Quotes,Motivational Quotes",image,https://i.pinimg.com/originals/1c/20/42/1c204295a5882b4c3d4ae1bb193efc52.png,/data/quotes,quotes
9192,db17d7c8-d899-41b8-a377-e54e3bc0e249,30 Gorgeous And Amazing Finger Tattoo Ideas - Women Fashion Lifestyle Blog Shinecoco.com,What do you think about finger tattoos? Good-looking they will help you to show the world an idea you have in mind without putting a huge picture on your skin. We love how simpl…,101000.0,Shinecoco,"Finger Tattoo Designs,Cool Finger Tattoos,Finger Tattoo For Women,Hand Tattoos For Women,Cool Tattoos,Simple Finger Tattoo,Sexy Tattoos,Tattoos For Fingers,Womens Finger Tattoos",image,https://i.pinimg.com/originals/9a/81/80/9a8180eeecfd891f4c26caf49eceba8a.jpg,/data/tattoos,tattoos
3702,a04aaf6d-b34c-4570-89cb-6b05139c9c38,Silent Epidemic : Problems in Public Education (Paperback),Silent Epidemic: Problems in Public Education,2000000.0,Walmart,"Edd,Public,Education,Walmart,Products,Onderwijs,Learning,Gadget",image,https://i.pinimg.com/originals/40/7f/95/407f955191f4cb03a5864a84b41f988e.jpg,/data/education,education
4210,91782913-4da6-4c9f-9834-9d7064d8b987,How to Use Visuals to Help with Executive Functioning,"Do you students with autism struggle with executive functioning skills, such as initiation, planning, and emotional control? Read more about how I use visuals with my student wi…",5000.0,The Autism Vault,"Autism Education,Preschool Special Education,Gifted Education,Autism Classroom,Education Quotes,Education Logo,Social Skills Activities,Teaching Social Skills,Shape Activities",image,https://i.pinimg.com/originals/08/ba/72/08ba72687bf6ecdc98a4835fd93f5f9d.jpg,/data/education,education
1041,ffaf3079-f5b4-471d-be69-e655adb321be,Robot Challenge Screen,These are my top Korean beauty products on Amazon! I love them because they are affordable and effective. Get your K-beauty on!,2000.0,"Melody | LA Vida Color | Beauty, Health & Wellness Blog","Skin Care Regimen,Skin Care Tips,Kosmetik Online Shop,Beauty Hacks For Teens,Beauty Ideas,Skin Care Routine For 20s,Skin Tag,Image Skincare,Skin Care",image,https://i.pinimg.com/originals/40/62/3d/40623d335fc89628b6b9322da268af92.jpg,/data/beauty,beauty
2934,1474d430-956c-4ddd-b97c-6af474642d9f,DIY Bless You Mason Jar Tissue Dispenser — Day to Day Adventures,"I have a confession to make. Mason jars are my addiction. I can't NOT buy them at a garage sale. My daughter says that I have ""jar"" a problem. She's probably right. Last summer…",21000.0,DaytoDay Adventures | Blue Daisy | Home Decor | Crafts | Rag Rugs,"Mason Jar Projects,Mason Jar Crafts,Crafts With Jars,Pickle Jar Crafts,Mason Jar Christmas Crafts,Christmas Foods,Christmas Diy,Diy Projects To Try,Craft Projects",image,https://i.pinimg.com/originals/40/9b/ec/409bec5ee67e22028fcebfea31b8f1a4.jpg,/data/diy-and-crafts,diy-and-crafts
4374,b54e9e5e-69da-40ed-bcfe-809439a31f34,51 Good Event Planning Slogans and Taglines,We have compiled a list of some of the catchiest event planning slogans and tag lines ever thought up.,86000.0,The Blog Millionaire Podcast,"Event Planning Template,Event Planning Quotes,Event Planning Checklist,Event Planning Business,Party Planning,Catering Business,Event Ideas,Catchy Business Name Ideas,Business Ideas",image,https://i.pinimg.com/originals/79/72/3a/79723a61f36e99ae00f672fb35951f92.png,/data/event-planning,event-planning


# Cleaning the GEO Dataset

In [0]:
# Create a new column coordinates that contains an array based on the latitude and longitude columns

from pyspark.sql.functions import array

df_geo_with_coordinates = df_geo_cleaned.withColumn("coordinates", array("latitude", "longitude"))

display(df_geo_with_coordinates)

ind,longitude,latitude,country,timestamp,coordinates
10999,21.6704,-52.2172,Saint Martin,2019-04-19,"List(-52.2172, 21.6704)"
6630,-111.954,-50.7814,Bouvet Island (Bouvetoya),2021-12-12,"List(-50.7814, -111.954)"
1498,-101.72,-85.2965,Greenland,2021-10-29,"List(-85.2965, -101.72)"
6416,-179.022,-89.63,Argentina,2020-10-18,"List(-89.63, -179.022)"
4767,-29.2943,-63.6785,Hungary,2022-03-15,"List(-63.6785, -29.2943)"
9213,-179.689,-89.5173,Algeria,2021-07-04,"List(-89.5173, -179.689)"
8559,-161.644,-88.5255,American Samoa,2019-04-19,"List(-88.5255, -161.644)"
4067,-146.879,-85.0647,Bahamas,2020-03-01,"List(-85.0647, -146.879)"
373,134.913,31.1933,United States Minor Outlying Islands,2022-02-03,"List(31.1933, 134.913)"
7821,90.9314,61.5545,Chile,2020-05-13,"List(61.5545, 90.9314)"


In [0]:
# Convert the timestamp column from a string to a timestamp data type

from pyspark.sql.functions import col, to_timestamp

df_geo_with_coordinates = df_geo_with_coordinates.withColumn("timestamp", to_timestamp(col("timestamp")))

display(df_geo_with_coordinates)

ind,longitude,latitude,country,timestamp,coordinates
10999,21.6704,-52.2172,Saint Martin,2019-04-19T00:00:00.000+0000,"List(-52.2172, 21.6704)"
6630,-111.954,-50.7814,Bouvet Island (Bouvetoya),2021-12-12T00:00:00.000+0000,"List(-50.7814, -111.954)"
1498,-101.72,-85.2965,Greenland,2021-10-29T00:00:00.000+0000,"List(-85.2965, -101.72)"
6416,-179.022,-89.63,Argentina,2020-10-18T00:00:00.000+0000,"List(-89.63, -179.022)"
4767,-29.2943,-63.6785,Hungary,2022-03-15T00:00:00.000+0000,"List(-63.6785, -29.2943)"
9213,-179.689,-89.5173,Algeria,2021-07-04T00:00:00.000+0000,"List(-89.5173, -179.689)"
8559,-161.644,-88.5255,American Samoa,2019-04-19T00:00:00.000+0000,"List(-88.5255, -161.644)"
4067,-146.879,-85.0647,Bahamas,2020-03-01T00:00:00.000+0000,"List(-85.0647, -146.879)"
373,134.913,31.1933,United States Minor Outlying Islands,2022-02-03T00:00:00.000+0000,"List(31.1933, 134.913)"
7821,90.9314,61.5545,Chile,2020-05-13T00:00:00.000+0000,"List(61.5545, 90.9314)"


In [0]:
# Reorder the DataFrame columns to have the following column order

df_geo_clean = df_geo_with_coordinates.select("ind", "country", "coordinates", "timestamp")
display(df_geo_clean)

ind,country,coordinates,timestamp
10999,Saint Martin,"List(-52.2172, 21.6704)",2019-04-19T00:00:00.000+0000
6630,Bouvet Island (Bouvetoya),"List(-50.7814, -111.954)",2021-12-12T00:00:00.000+0000
1498,Greenland,"List(-85.2965, -101.72)",2021-10-29T00:00:00.000+0000
6416,Argentina,"List(-89.63, -179.022)",2020-10-18T00:00:00.000+0000
4767,Hungary,"List(-63.6785, -29.2943)",2022-03-15T00:00:00.000+0000
9213,Algeria,"List(-89.5173, -179.689)",2021-07-04T00:00:00.000+0000
8559,American Samoa,"List(-88.5255, -161.644)",2019-04-19T00:00:00.000+0000
4067,Bahamas,"List(-85.0647, -146.879)",2020-03-01T00:00:00.000+0000
373,United States Minor Outlying Islands,"List(31.1933, 134.913)",2022-02-03T00:00:00.000+0000
7821,Chile,"List(61.5545, 90.9314)",2020-05-13T00:00:00.000+0000


# Cleaning the USER Dataset

In [0]:
# Create a new column user_name that concatenates the information found in the first_name and last_name columns

from pyspark.sql.functions import concat_ws

df_user_cleaned = df_user_cleaned.withColumn("user_name", concat_ws(" ", col("first_name"), col("last_name")))
display(df_user_cleaned)

ind,first_name,last_name,age,date_joined,user_name
1498,Bianca,Johnson,29,2016-01-29,Bianca Johnson
6416,Albert,Allison,20,2015-10-21,Albert Allison
4767,Madeline,Brown,43,2016-11-11,Madeline Brown
9213,Aaron,Abbott,20,2015-10-23,Aaron Abbott
8559,Alexander,Bailey,20,2015-10-30,Alexander Bailey
4067,Angela,Becker,26,2015-11-23,Angela Becker
373,Christopher,Smith,48,2017-05-21,Christopher Smith
7821,Jeff,Jones,57,2017-07-11,Jeff Jones
1088,Nina,Smith,51,2017-06-29,Nina Smith
6302,Jason,Brooks,28,2015-12-21,Jason Brooks


In [0]:
# Drop the first_name and last_name columns from the DataFrame

df_user_name = df_user_cleaned.drop("first_name", "last_name")
display(df_user_name)

ind,age,date_joined,user_name
1498,29,2016-01-29,Bianca Johnson
6416,20,2015-10-21,Albert Allison
4767,43,2016-11-11,Madeline Brown
9213,20,2015-10-23,Aaron Abbott
8559,20,2015-10-30,Alexander Bailey
4067,26,2015-11-23,Angela Becker
373,48,2017-05-21,Christopher Smith
7821,57,2017-07-11,Jeff Jones
1088,51,2017-06-29,Nina Smith
6302,28,2015-12-21,Jason Brooks


In [0]:
# Convert the date_joined column from a string to a timestamp data type

from pyspark.sql.functions import to_timestamp

df_user_dates = df_user_name.withColumn("date_joined", to_timestamp("date_joined"))
display(df_user_dates)

ind,age,date_joined,user_name
1498,29,2016-01-29T00:00:00.000+0000,Bianca Johnson
6416,20,2015-10-21T00:00:00.000+0000,Albert Allison
4767,43,2016-11-11T00:00:00.000+0000,Madeline Brown
9213,20,2015-10-23T00:00:00.000+0000,Aaron Abbott
8559,20,2015-10-30T00:00:00.000+0000,Alexander Bailey
4067,26,2015-11-23T00:00:00.000+0000,Angela Becker
373,48,2017-05-21T00:00:00.000+0000,Christopher Smith
7821,57,2017-07-11T00:00:00.000+0000,Jeff Jones
1088,51,2017-06-29T00:00:00.000+0000,Nina Smith
6302,28,2015-12-21T00:00:00.000+0000,Jason Brooks


In [0]:
df_user_clean = df_user_dates.select("ind", "user_name", "age", "date_joined")

In [0]:
dbutils.fs.rm("/tmp/kinesis/_checkpoints/", True)

In [0]:
df_pin_clean.writeStream \
  .format("delta") \
  .outputMode("append") \
  .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
  .table("0affe012670f_pin_table")

In [0]:
df_geo_clean.writeStream \
  .format("delta") \
  .outputMode("append") \
  .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
  .table("0affe012670f_geo_table")

In [0]:
df_user_clean.writeStream \
  .format("delta") \
  .outputMode("append") \
  .option("checkpointLocation", "/tmp/kinesis/_checkpoints/") \
  .table("0affe012670f_user_table")