In [None]:
dbutils.fs.ls("/FileStore/tables")

In [None]:
# pyspark functions
from pyspark.sql.functions import *
# URL processing
import urllib

### Mount the S3 bucket

In [None]:
# Specify file type to be csv
file_type = "csv"
# Indicates file has first row as the header
first_row_is_header = "true"
# Indicates file has comma as the delimeter
delimiter = ","
# Read the CSV file to spark dataframe
aws_keys_df = spark.read.format(file_type)\
.option("header", first_row_is_header)\
.option("sep", delimiter)\
.load("/FileStore/tables/authentication_credentials.csv")

In [None]:
# Get the AWS access key and secret key from the spark dataframe: AFTER INCIDENT OF 18 DEC 2023
ACCESS_KEY = aws_keys_df.select('Access key ID').collect()[0]['Access key ID'] 
SECRET_KEY = aws_keys_df.select('Secret access key').collect()[0]['Secret access key'] 
# Encode the secrete key 
ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")

In [None]:
# AWS S3 bucket name: 
# to be edited for matching real situation!
AWS_S3_BUCKET = "user-0ea9a6e05a33-bucket"
# Mount name for the bucket
MOUNT_NAME = "/mnt/user-0ea9a6e05a33-bucket"
# Source url
SOURCE_URL = "s3n://{0}:{1}@{2}".format(ACCESS_KEY, ENCODED_SECRET_KEY, AWS_S3_BUCKET)
# Mount the drive
# dbutils.fs.mount(SOURCE_URL, MOUNT_NAME)# this bridges
# unmount
# dbutils.fs.unmount("/mnt/mount_name")

In [None]:
display(dbutils.fs.ls("/mnt/user-0ea9a6e05a33-bucket/topics"))

path,name,size,modificationTime
dbfs:/mnt/user-0ea9a6e05a33-bucket/topics/0ea9a6e05a33.geo/,0ea9a6e05a33.geo/,0,1702927625074
dbfs:/mnt/user-0ea9a6e05a33-bucket/topics/0ea9a6e05a33.pin/,0ea9a6e05a33.pin/,0,1702927625074
dbfs:/mnt/user-0ea9a6e05a33-bucket/topics/0ea9a6e05a33.user/,0ea9a6e05a33.user/,0,1702927625074


## Extract

In [None]:
# File location and type
# Asterisk(*) indicates reading all the content of the specified file that have .json extension in the specific Spark topic
file_location = "/mnt/user-0ea9a6e05a33-bucket/topics/0ea9a6e05a33.{}/partition=0/*.json"
# file_location = "/mnt/user-0ea9a6e05a33-bucket/topics/*.user/partition=0/*.json"

file_type = "json"
# Ask Spark to infer the schema
infer_schema = "true"

# UNIFY CODE FOR EXTRACTING THE THREE DATASETS (originally FIRST .pin, THEN .geo and .user)

# Read in JSONs from mounted S3 bucket: df_user
df_user = spark.read.format(file_type) \
.option("inferSchema", infer_schema) \
.load(file_location.format('user'))
# Display Spark dataframe to check its content (not to port to github!)
display(df_user)

# Read in JSONs from mounted S3 bucket: df_geo
df_geo = spark.read.format(file_type) \
.option("inferSchema", infer_schema) \
.load(file_location.format('geo'))
# Display Spark dataframe to check its content
display(df_geo)

# # Read in JSONs from mounted S3 bucket: df_pin
df_pin = spark.read.format(file_type) \
.option("inferSchema", infer_schema) \
.load(file_location.format('pin'))
# Display Spark dataframe to check its content
display(df_pin)

age,date_joined,first_name,ind,last_name
27,2016-03-08 13:38:37,Christopher,2015,Bradshaw
59,2017-05-12 21:22:17,Alexander,10673,Cervantes
27,2016-03-08 13:38:37,Christopher,2015,Bradshaw
59,2017-05-12 21:22:17,Alexander,10673,Cervantes
48,2016-02-27 16:57:44,Christopher,1857,Hamilton
39,2016-06-29 20:43:59,Christina,6398,Davenport
20,2015-10-23 04:13:23,Alexandria,3599,Alvarado
39,2016-06-29 20:43:59,Christina,6398,Davenport
20,2015-10-23 04:13:23,Alexandria,3599,Alvarado
20,2015-10-23 04:13:23,Alexandria,4256,Alvarado


country,ind,latitude,longitude,timestamp
British Indian Ocean Territory (Chagos Archipelago),9455,-82.9272,-150.346,2022-03-15T01:46:32
British Indian Ocean Territory (Chagos Archipelago),6814,-86.5675,-149.565,2022-09-02T11:34:28
British Indian Ocean Territory (Chagos Archipelago),9455,-82.9272,-150.346,2022-03-15T01:46:32
British Indian Ocean Territory (Chagos Archipelago),6814,-86.5675,-149.565,2022-09-02T11:34:28
British Indian Ocean Territory (Chagos Archipelago),9455,-82.9272,-150.346,2022-03-15T01:46:32
British Indian Ocean Territory (Chagos Archipelago),6814,-86.5675,-149.565,2022-09-02T11:34:28
British Indian Ocean Territory (Chagos Archipelago),5111,-83.7472,8.65953,2021-04-01T00:56:57
British Indian Ocean Territory (Chagos Archipelago),5111,-83.7472,8.65953,2021-04-01T00:56:57
British Indian Ocean Territory (Chagos Archipelago),5111,-83.7472,8.65953,2021-04-01T00:56:57
British Indian Ocean Territory (Chagos Archipelago),2989,-87.013,133.062,2020-01-09T19:18:54


category,description,downloaded,follower_count,image_src,index,is_image_or_video,poster_name,save_location,tag_list,title,unique_id
home-decor,"Традиционные шведские коттеджи, обычно с красным фасадом — это настоящее воплощением идеального зимнего уюта. Они обычно оформлены очень просто и ✌PUFIK. Beautiful Interiors. On…",1,136k,https://i.pinimg.com/originals/32/eb/72/32eb72e4fd8654c115a64528bd1f34b4.png,6717,image,PUFIK Interiors & Inspirations,Local save in /data/home-decor,"Scandinavian Cottage,Swedish Cottage,Swedish Home Decor,Swedish Farmhouse,Swedish Style,Swedish Kitchen,Kitchen Black,Swedish House,Cozy Cottage",〚 Уютные шведские коттеджи от Carina Olander 〛 ◾ Фото ◾ Идеи ◾ Дизайн,bc5ab9ee-505e-44f6-92ba-677fe4fdf3e3
christmas,"Features: Material:Lint Size:48ｘ18cm Quantity:1 pc Shape:Santa Claus, snowman. Elk Occasion:Christmas Description: 1. Fashion design, high quality 2. Santa Claus, snowman. Elk C…",1,5k,https://i.pinimg.com/originals/b5/7f/21/b57f219fa89c1165b57525b8eae711da.jpg,1706,image,Wear24-7,Local save in /data/christmas,"Merry Christmas To You,Christmas Toys,Great Christmas Gifts,Christmas Snowman,Christmas Ornaments,Holiday,Christmas Party Decorations,Christmas Themes,Decoration Party",Standing Figurine Toys Xmas Santa Claus Snowman Reindeer Figure Plush Dolls Christmas Decorations Ornaments Home Indoor Table Ornaments Christmas Party Tree Hanging Decor Toys Gifts for Kids Friends…,b5c8a1b5-9e90-4522-9bec-2477b698d5b7
christmas,"❤️ ❤️ MERRY CHRISTMAS ❤️ ❤️ ❤️ ❤️ Early Christmas Special:Buy 3 Get 1 Free, Buy 5 Get 2 Free,Deadline November 25. Color:GreenMaterial:Polyvinyl ChlorideItem Dimensions:LxWxH 20…",1,784,https://i.pinimg.com/originals/ef/40/7e/ef407e9568aa46fed4162bd1fd28786e.jpg,1676,image,paupoo,Local save in /data/christmas,"Christmas Hanging Baskets,Christmas Plants,Christmas Wreaths,Christmas Ornaments,Merry Christmas,Christmas Sale,Christmas Porch Ideas,Hanging Christmas Lights,Christmas Island",PAUPOO™ Pre-lit Artificial Christmas Hanging Basket - Flocked with Mixed Decorations and White LED Lights - Frosted Berry BUY 5 GET 2 FREE(7PACKS),3ed92c2d-9cca-4ccf-ac25-44a9d8bec919
christmas,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",1,46k,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,2482,video,"Life on Summerhill | Home, Holiday Decor & DIY Website",Local save in /data/christmas,"Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",FORNT PORCH CHRISTMAS DECORATING IDEAS,08604f20-fa17-4b9a-9949-781717eca6cd
vehicles,"By David Crane ; defrev (at) gmail (dot) com All photos contained in this article were shot by DefenseReview.com (DR), and are copyrighted. DefenseReview.com owns the copyright…",1,709,https://i.pinimg.com/originals/36/63/12/366312d747da1358397610a86bf21b20.jpg,10538,image,Ricky Lee,Local save in /data/vehicles,"Army Vehicles,Armored Vehicles,Cool Trucks,Cool Cars,Amphibious Vehicle,Offroader,Bug Out Vehicle,Vehicle Wraps,Terrain Vehicle",BC Customs (BCC) Search and Rescue Tactical Vehicle-5 (SRTV-5) Baja Racing-Type All-Terrain Combat Vehicle Armed/Weaponized with 7.62mm NATO Garwood Industries (GI) M134G Minigun/Gatling Gun: SXOR…,5d9fa7e2-2118-4442-99b6-537d60463a6a
event-planning,This fabulous DIY project made me drool when I first saw it and I knew immediately that I was going to have to make this! I absolutely love things like this...shiny sparkly thin…,1,985k,https://i.pinimg.com/originals/a6/79/3c/a6793c2e3deebca67ecd82b0087fc13c.jpg,4585,image,"DIY Joy - Crafts, Home Improvement, Decor & Recipes",Local save in /data/event-planning,"Cheap Favors,Wedding Favors Cheap,Wedding Invitations,Wedding Planning On A Budget,Event Planning,Wedding Table Decorations,Wedding Centerpieces,Dollar Tree Centerpieces,Centerpiece Ideas",She Attaches Crystals To A Plate And Creates A Breakfast At Tiffany's Inspired Item!,aa873546-701b-40dd-a339-a3f8aaf78ccb
art,Marble Wall Art Modern Abstract Canvas Artwork Contemporary Home Decor Canvas Wall Art Ready to Hang Canvas Each canvas is professionally printed and hand-stretched in the USA.…,1,305,https://i.pinimg.com/originals/b2/6e/95/b26e950a283805d09ef9a4a279781217.jpg,527,image,Wall Canvas Mall,Local save in /data/art,"Modern Art Paintings,Modern Artwork,Modern Wall Art,Blue Artwork,Modern Canvas Art,Contemporary Home Decor,Modern Art Prints,Framed Canvas Prints,Wall Art Prints","Blue Gold Marble Canvas , Luxury Wall Art, Abstract Wall Decor, Navy Blue Abstract, Modern Artwork, Oversize Canvas Art, Contemporary Art - 1 Panel 12x9 / Gallery Wrap",ed8af037-ee87-4a80-97ac-99f5b153cf7e
event-planning,"Updated: January 25, 2017 You’ve organized some events for your family, friends or community and you have gained a budding reputation for knowing how put events together. You’ve…",1,4k,https://i.pinimg.com/originals/c3/2b/c6/c32bc6ad263857cb0eea19f9cd12beb9.jpg,4357,image,EventPlanning.com | Learn How To Become An Event Planner,Local save in /data/event-planning,"Event Planning Quotes,Event Planning Checklist,Event Planning Business,Business Events,Business Ideas,Business Names,Business Opportunities,Corporate Events,Wedding Event Planner",First Steps in Launching Your Own Event Business - Learn About Event Planning,ccf116e9-9096-4943-a344-1960ce216445
event-planning,20ftx10ft Blush Double Layer Polyester Chiffon Backdrop With Rod Pockets Write a review Item Number: BKDP300_046 Default Title - $107.99 USD $107.99Sale Price$166.59Retail Price,1,68k,https://i.pinimg.com/originals/55/1c/c6/551cc65e5aa06edf2eb25318c61ecbd4.jpg,4925,image,"eFavormart | One-Stop Shop for Wedding, Party & Event Supplies!",Local save in /data/event-planning,"Pink Photography,Background For Photography,Photography Backdrops,Wedding Photography,Photography Ideas,Burlap Backdrop,Fabric Backdrop,Pink Backdrop,Sequin Backdrop",20FT x 10FT Blush | Rose Gold Double Layer Polyester Chiffon Backdrop With Rod Pockets,662c77cd-d99a-45f8-9a36-62feadbb4f7b
christmas,"All the farmhouse Christmas decor ideas to create a gorgeous home for the holidays. Ideas to create a beautiful mantle, stocking you willl love, pillow covers and more!",1,91k,https://i.pinimg.com/originals/6f/df/3b/6fdf3bb828b057c76050d5f158694cf6.png,2288,image,"Kristen | Lifestyle, Mom Tips & Teacher Stuff Blog",Local save in /data/christmas,"Christmas Decorations For The Home,Winter Home Decor,Farmhouse Christmas Decor,Rustic Christmas,Christmas Fireplace,Christmas Mantels,Livingroom Christmas Decor,Christmas Home Decorating,Stocking Decorating Ideas",97 Farmhouse Christmas Decor Ideas For Your Home - Chaylor & Mads,991f5083-e4f2-4eee-bf85-d76bf84a08e2


## Transform

In [None]:
from pyspark.sql.functions import array
df_geo = df_geo.withColumn('coordinates',array('latitude','longitude'))
df_geo = df_geo[['ind', 'coordinates','timestamp']]
list(df_geo.columns)

In [None]:
from pyspark.sql.functions import concat
df_user = df_user.withColumn('user_name', concat('first_name', 'last_name')).select('ind', 'user_name', 'age', 'date_joined')
df_user = df_user[['ind', 'user_name','age','date_joined']]
list(df_user.columns)

In [None]:
df_pin = df_pin.fillna('None') 
df_pin = df_pin[['index', 'unique_id','title','description','follower_count','poster_name','tag_list','is_image_or_video','image_src','save_location','category']]
list(df_pin.columns)