# Project 4 - Apache Spark Data Lake

_**NOTE**: To better understand what has been done, please read first the README that is inside the project folder in Github:
**udacity-de/apache-spark-data-lake/README.md**_

#### Import Python packages 

In [1]:
import configparser
from datetime import datetime
import boto3
import json
import os
import sys
## Spark
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import DateType
## Local
sys.path.insert(0, '../src')
import config as cf

In [2]:
## Default
CONFIG_FILE = '../src/dl.cfg'

## Reading cfg file
config = configparser.ConfigParser()
config.read(CONFIG_FILE)

## Setting up Access Key and Secret Key
AWS_KEY = config.get('AWS','AWS_ACCESS_KEY_ID')
AWS_SECRET = config.get('AWS','AWS_SECRET_ACCESS_KEY')
AWS_REGION = config.get('AWS','region_name')
AWS_NAME = config.get('AWS','name')

In [3]:
config.read(CONFIG_FILE)

['../src/dl.cfg']

In [4]:
os.environ['AWS_ACCESS_KEY_ID'] = config['AWS']['AWS_ACCESS_KEY_ID']
os.environ['AWS_SECRET_ACCESS_KEY'] = config['AWS']['AWS_SECRET_ACCESS_KEY']
    
input_data = config['DATALAKE']['INPUT_DATA']
output_data = config['DATALAKE']['OUTPUT_DATA']

In [5]:
input_data

's3a://udacity-dend/'

In [6]:
output_data

's3a://udacity-spark-data-lake'

In [7]:
spark = SparkSession \
        .builder \
        .config("spark.jars.packages", "org.apache.hadoop:hadoop-aws:2.7.0") \
        .getOrCreate()

In [8]:
emr = boto3.client( 'emr',
        region_name=AWS_REGION,
        aws_access_key_id=AWS_KEY,
        aws_secret_access_key=AWS_SECRET,
    )

s3 = boto3.resource('s3',
       region_name="us-west-2",
       aws_access_key_id=config['AWS']['AWS_ACCESS_KEY_ID'],
       aws_secret_access_key=config['AWS']['AWS_SECRET_ACCESS_KEY']
    )

In [9]:
page_iterator = emr.get_paginator('list_clusters').paginate(
    ClusterStates=['RUNNING','WAITING', 'TERMINATED']
)

for page in page_iterator:
    for item in page['Clusters']:
        print(item['Name'] , item['Status'])

spark-emr-cluster {'State': 'TERMINATED', 'StateChangeReason': {'Message': 'Steps completed with errors'}, 'Timeline': {'CreationDateTime': datetime.datetime(2021, 2, 26, 11, 28, 57, 940000, tzinfo=tzlocal()), 'ReadyDateTime': datetime.datetime(2021, 2, 26, 11, 35, 5, 860000, tzinfo=tzlocal()), 'EndDateTime': datetime.datetime(2021, 2, 26, 13, 44, 15, 192000, tzinfo=tzlocal())}}
spark-emr-cluster {'State': 'TERMINATED', 'StateChangeReason': {'Code': 'USER_REQUEST', 'Message': 'Terminated by user request'}, 'Timeline': {'CreationDateTime': datetime.datetime(2021, 2, 26, 11, 26, 20, 500000, tzinfo=tzlocal()), 'EndDateTime': datetime.datetime(2021, 2, 26, 11, 26, 58, 42000, tzinfo=tzlocal())}}
spark-emr-cluster {'State': 'TERMINATED', 'StateChangeReason': {'Code': 'USER_REQUEST', 'Message': 'Terminated by user request'}, 'Timeline': {'CreationDateTime': datetime.datetime(2021, 2, 26, 11, 5, 38, 261000, tzinfo=tzlocal()), 'ReadyDateTime': datetime.datetime(2021, 2, 26, 11, 10, 12, 270000, 

In [10]:
song_data_path = "udacity-dend"
log_data_path = input_data + "log_data/"
print(song_data_path)
print(log_data_path)

input_data_bucket =  s3.Bucket(song_data_path)

udacity-dend
s3a://udacity-dend/log_data/


In [11]:
count_sd = 0
for obj in input_data_bucket.objects.filter(Prefix="song_data"):
    count_sd += 1
    print(obj)
print(count_sd)

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAAK128F9318786.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAAV128F421A322.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAABD128F429CF47.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAACN128F9355673.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAEA128F935A30D.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAED128E0783FAB.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAEM128F93347B9.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAEW128F42930C0.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAFD128F92F423A.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/A/A/TRAAAGR128F425B14B.json')
s3.ObjectSummary(

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRMJ128E0780E42.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRMZ128F4261DED.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABROY128F9346329.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRPM128F4221A95.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRQY12903CA5133.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRRK128F14554B0.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRRQ128F42642BA.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRVO128F9346FCE.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRVU128EF342D09.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRWS128F422957B.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/B/R/TRABRYL128F92F

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/J/TRADJYV12903CCEDA8.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKAX128E0786B92.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKCM128F42728E9.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKDX128F9320676.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKEU12903CF1540.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKIF128F428A193.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKJB128F9314B17.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKKR128F425764A.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKLA128F93101FC.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKLT128F9304E1D.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/D/K/TRADKNW12903CD

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFGI128F9342668.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFGO12903CB78B0.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFHZ128F1454059.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFJV128F92D6FB5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFJX128F9307573.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFKC128F931CA02.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFKI128F931906C.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFKI12903CA5F42.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFKV12903CE29B5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFML128F146903B.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/F/F/TRAFFOV128F934

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWMO12903CFD994.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWMV128F426F6B9.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWNQ12903CDD0C3.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWPR128F426BDC3.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWSI12903CDE0CA.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWTT128F148CF2C.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWUI128F147B815.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWUL128F428DF40.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWUV128F93129CD.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWUX12903CEEF08.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/G/W/TRAGWVA12903D0

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPRY128F427CA6E.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPST128F429876D.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPTW128F4299F05.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPUF128F93059DC.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPWK128F42417A5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPWY128F92FDA73.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPZG128F4257BFE.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/P/TRAIPZY128E0787299.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/Q/TRAIQAY128F4233CC5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/Q/TRAIQBQ128F1457CA2.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/I/Q/TRAIQBV12903CB

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJCF128F14A0EDD.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJDU128F931F134.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJEL128F149E2EC.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJIC12903CF3B47.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJIL128F932042F.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJKC128F42621D0.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJKE128F428C203.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJKQ128F42A2E33.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJMR12903CF8776.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJNE128F149962E.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/K/J/TRAKJOI12903D0

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/C/TRAMCWR128F4233F7F.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/C/TRAMCZK128F14787E4.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/C/TRAMCZN128F146C762.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/C/TRAMCZT128F149FAB6.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/D/TRAMDAK128E0791DEC.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/D/TRAMDBX128F92D55AB.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/D/TRAMDCM128F92DDD62.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/D/TRAMDCX128F4294590.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/D/TRAMDDK12903D03146.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/D/TRAMDDN128F4277101.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/M/D/TRAMDFW128F428

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWOF12903CB3E19.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWOG128F4265FD1.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWQA128F42519D1.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWRZ128F422E980.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWTX12903CB30CD.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWUF128F93518DB.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWUI128F9312EA6.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/W/TRANWYH128F42857F1.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/X/TRANXBO128F428CB28.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/X/TRANXCG128F429E743.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/N/X/TRANXDE128F426

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSCM128F42B79A7.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSCQ12903CC4A54.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSDZ12903CDC441.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSEO128F4297178.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSGE128F14AE381.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSHW128F14560A6.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSIM128F92F1D28.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSIX12903CB6B43.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSJN128F422AF18.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSKX128F930AE37.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/P/S/TRAPSLA12903CA

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMDH128F426F221.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMDV128F428C7E2.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMEH128F422BC55.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMFD128F4231386.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMFN128F92DB4F0.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMFV128F9303D80.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMGK128F4275356.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMGN12903D01B21.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMHA128F92E7D32.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMKM128F42666D5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/R/M/TRARMLG128F930

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/E/TRATEJI128F145742A.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/E/TRATEOW12903CAE3D5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/E/TRATEPE128F92F2C2C.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/E/TRATEVC12903CA1650.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/E/TRATEVQ12903CF86BD.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/E/TRATEXI128F14AE4AD.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/F/TRATFAU128F42A54A5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/F/TRATFDH128F146C61B.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/F/TRATFDM12903CE8925.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/F/TRATFDV128F428B0F3.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/T/F/TRATFDV128F42B

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZJG12903CA439C.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZJP128F427040B.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZJW128F4253BCA.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZKV12903CAF41E.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZLI128E0791D51.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZLI128F427EA58.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZMW128F92F069E.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZOK128F145B5AB.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZOU128F42B9A34.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZQH128F422AFAF.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/U/Z/TRAUZSC128F42B

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/S/TRAWSXV128F4273E35.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/S/TRAWSYF128F149431A.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/S/TRAWSYJ12903CB2E3B.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/S/TRAWSZV128F4236A17.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/T/TRAWTCF128F9306CB9.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/T/TRAWTDG128F93320BD.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/T/TRAWTDY12903CE10C0.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/T/TRAWTFJ12903D07887.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/T/TRAWTHB128F1491C02.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/T/TRAWTIF128F9313BD7.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/W/T/TRAWTIL128F427

s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMLD128F425F170.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMMA128F92DEB97.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMMI12903CDBD48.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMMZ12903CE0457.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMNI128F92DCFC5.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMNL128F4295FC1.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMNP128F9314D93.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMOF12903CC8ECE.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMPM128F42417A2.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMVL128E0785EEF.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='song_data/A/Y/M/TRAYMXS128F145

In [12]:
count_ld = 0
for obj in input_data_bucket.objects.filter(Prefix="log_data"):
    count_ld += 1
    print(obj)
print(count_ld)

s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-01-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-02-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-03-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-04-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-05-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-06-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-07-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-08-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-09-events.json')
s3.ObjectSummary(bucket_name='udacity-dend', key='log_data/2018/11/2018-11-10-events.json')
s3.ObjectSummary(b

In [13]:
output_data

's3a://udacity-spark-data-lake'

In [14]:
output_data_bucket =  s3.Bucket('udacity-spark-data-lake')

In [15]:
output_data_bucket

s3.Bucket(name='udacity-spark-data-lake')

In [16]:
## Output
count_sd = 0
for obj in output_data_bucket.objects.filter(Prefix="songs"):
    count_sd += 1
    print(obj)
print(count_sd)

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_SUCCESS')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR00FVC1187FB5BE3E/part-00055-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR00FVC1187FB5BE3E/part-00071-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR00FVC1187FB5BE3E/part-00129-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR00LNI1187FB444A5/part-00432-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR00MQ31187B9ACD8F/part-00385-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-dat

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR8RXRI1187FB4104B/part-00087-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR8S6R11187B9B0407/part-00259-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR8TEHU1187FB4C014/part-00274-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR8TFCF1187B98FC82/part-00105-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR8UA7Z1187B9AD9C9/part-00068-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=AR8UMFB1187B9B6030/part-00124-f521d110-

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARGC1161187FB4D010/part-00459-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARGCBAJ1187FB3F481/part-00045-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARGCBAJ1187FB3F481/part-00161-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARGCBAJ1187FB3F481/part-00244-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARGCE5D1187B9908FC/part-00148-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARGCGKV1187B9B6E25/part-00038-f521d110-

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARNFFVQ1242077E94A/part-00137-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARNG1DH1187FB3AB56/part-00161-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARNG49J1187FB3C893/part-00283-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARNG8IY1187B9A6FE9/part-00452-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARNGKVG1187B9AE563/part-00435-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARNGLM31187FB5A765/part-00125-f521d110-

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARUWI271187FB54CC5/part-00216-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARUWLCL1187FB549B8/part-00265-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARUWLCL1187FB549B8/part-00416-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARUWYOE122ECCB8B2F/part-00332-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARUWYOE122ECCB8B2F/part-00369-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=0/_artist_id=ARUX0P11187FB40AA6/part-00281-f521d110-

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1972/_artist_id=ARB29H41187B98F0EF/part-00187-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1972/_artist_id=ARBQW1H1187FB3E3A0/part-00360-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1972/_artist_id=ARC3DNB1187FB4F824/part-00286-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1972/_artist_id=ARDD09D1187B99AD60/part-00041-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1972/_artist_id=AREM86Q1187FB4B04D/part-00259-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1972/_artist_id=ARFCUN31187B9AD578/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1989/_artist_id=ARV4GYZ1187B9B8171/part-00149-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1989/_artist_id=ARVL39W1187B989514/part-00224-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1989/_artist_id=ARVQTZP1187FB40283/part-00145-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1989/_artist_id=ARVWX1G1187B992EF1/part-00125-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1989/_artist_id=ARW5MXU1187B9A9F58/part-00334-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1989/_artist_id=ARW63XP1187FB5AB99/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1995/_artist_id=ARFQOGB1187FB36588/part-00067-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1995/_artist_id=ARGFDBP1187B9B0688/part-00412-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1995/_artist_id=ARGJ7R91187B9AF64A/part-00440-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1995/_artist_id=ARGMCS61187B9B6BDB/part-00443-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1995/_artist_id=ARGMXQW1187FB54C34/part-00062-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1995/_artist_id=ARGUP0P1187FB49FD4/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1999/_artist_id=AR49F631187B98BFA6/part-00068-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1999/_artist_id=AR4ANKV1187FB4E397/part-00158-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1999/_artist_id=AR4C6V01187FB3BAF4/part-00333-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1999/_artist_id=AR4EAEE1187B98C3E8/part-00208-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1999/_artist_id=AR4J8B41187FB3D170/part-00364-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=1999/_artist_id=AR4M50B1187FB3EE81/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2001/_artist_id=ARTJECM1187B9930BA/part-00171-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2001/_artist_id=ARTL0JQ1187FB4D190/part-00025-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2001/_artist_id=ARTM7BI1187B98AD9A/part-00464-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2001/_artist_id=ARTMSN91187FB3A3B7/part-00429-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2001/_artist_id=ARTS26V1187FB3F9FE/part-00438-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2001/_artist_id=ARTV3SB1187B997689/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2003/_artist_id=ARUIRXF1187FB45BC6/part-00247-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2003/_artist_id=ARUNAM11187B98B490/part-00214-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2003/_artist_id=ARUQP2F1187B99AF16/part-00383-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2003/_artist_id=ARUS8MI1187FB4CD48/part-00257-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2003/_artist_id=ARUSOMY1187FB4D3B4/part-00228-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2003/_artist_id=ARUTEVA1187B9AE391/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2005/_artist_id=ARDGLX91187FB4CC38/part-00308-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2005/_artist_id=ARDGRTA1187B9B3BB5/part-00252-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2005/_artist_id=ARDH6XJ1187B993D57/part-00395-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2005/_artist_id=ARDKBAV1187FB4AF61/part-00461-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2005/_artist_id=ARDNS031187B9924F0/part-00017-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2005/_artist_id=ARDNZL61187B98F42D/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2006/_artist_id=ARPD2KK1187B9B8B98/part-00399-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2006/_artist_id=ARPDDNW1187B998CC1/part-00427-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2006/_artist_id=ARPFK3E1187B9A43BF/part-00139-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2006/_artist_id=ARPH2Q31187FB43B01/part-00396-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2006/_artist_id=ARPH2Q31187FB43B01/part-00400-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2006/_artist_id=ARPKATM1187B9B76E6/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2007/_artist_id=ARVFTA41187B9963CD/part-00136-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2007/_artist_id=ARVJ6541187B9A4C3E/part-00126-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2007/_artist_id=ARVJR6T1187B9A76A4/part-00451-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2007/_artist_id=ARVLXWP1187FB5B94A/part-00226-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2007/_artist_id=ARVNGA71187FB3C107/part-00144-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2007/_artist_id=ARVNNXD1187B9AE50D/pa

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2009/_artist_id=AR73S4G1187B9A03C2/part-00417-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2009/_artist_id=AR79L0D1187FB3AFB6/part-00279-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2009/_artist_id=AR7BBIT1187FB4EAA2/part-00251-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2009/_artist_id=AR7DIY41187B9B3EE9/part-00271-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2009/_artist_id=AR7KV2A1187FB466FC/part-00224-f521d110-b8e2-40a5-ad42-b991189d34b1.c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='songs/_year=2009/_artist_id=AR7RSNX1187FB47EAD/pa

In [17]:
song_df = spark.read.parquet(os.path.join(output_data, 'songs/_year=2004/_artist_id=AR00JIO1187B9A5A15/*.snappy.parquet'))

In [18]:
print('size=',song_df.count())
song_df.limit(5).toPandas()

size= 1


Unnamed: 0,song_id,title,artist_id,year,duration
0,SODJRJR12A8C13FD5A,N.Y. Streetz,AR00JIO1187B9A5A15,2004,256.33914


In [19]:
## Artists
count_sd = 0
for obj in output_data_bucket.objects.filter(Prefix="artists"):
    count_sd += 1
    print(obj)
print(count_sd)

s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/_SUCCESS')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/part-00000-a11ec29b-e3fc-4024-86a7-32e0ac8a0643-c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/part-00001-a11ec29b-e3fc-4024-86a7-32e0ac8a0643-c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/part-00002-a11ec29b-e3fc-4024-86a7-32e0ac8a0643-c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/part-00003-a11ec29b-e3fc-4024-86a7-32e0ac8a0643-c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/part-00004-a11ec29b-e3fc-4024-86a7-32e0ac8a0643-c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/part-00005-a11ec29b-e3fc-4024-86a7-32e0ac8a0643-c000.snappy.parquet')
s3.ObjectSummary(bucket_name='udacity-spark-data-lake', key='artists/part-00006-a11ec2

-------