# ETL Processes
Use this notebook to develop the ETL process for each of your tables before completing the `etl.py` file to load the whole datasets.

In [1]:
import os
import glob
import psycopg2
import pandas as pd
import numpy as np
from sql_queries import *

In [2]:
import sql_queries
from importlib import reload
reload(sql_queries)

<module 'sql_queries' from '/home/workspace/sql_queries.py'>

In [3]:
print(sql_queries.song_select)
#print(artist_table_insert)


SELECT a.song_id, a.artist_id 
FROM songs a INNER JOIN artists b 
ON a.artist_id = b.artist_id 
WHERE a.title = %s AND b.name = %s AND a.duration = %s ;



In [4]:
conn = psycopg2.connect("host=127.0.0.1 dbname=sparkifydb user=student password=student")
cur = conn.cursor()

In [210]:
'''
get_files() : Gets the filepath of all the files in the subdirectory
arg1 : filepath : Input filepath
output : returns absolute path of all the files. 
'''
# get_files : 
def get_files(filepath):
    all_files = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root,'*.json'))
        for f in files :
            all_files.append(os.path.abspath(f))
    
    return all_files

# Process `song_data`
In this first part, you'll perform ETL on the first dataset, `song_data`, to create the `songs` and `artists` dimensional tables.

Let's perform ETL on a single song file and load a single record into each table to start.
- Use the `get_files` function provided above to get a list of all song JSON files in `data/song_data`
- Select the first song in this list
- Read the song file and view the data

In [211]:
filepath= 'data/song_data'
song_files = get_files(filepath)
print("Total Files {}".format(len(song_files)))
#print(song_files)
for i, fp in enumerate(song_files):
    print("{}. {}".format(i, fp))

Total Files 71
0. /home/workspace/data/song_data/A/A/A/TRAAAAW128F429D538.json
1. /home/workspace/data/song_data/A/A/A/TRAAAVO128F93133D4.json
2. /home/workspace/data/song_data/A/A/A/TRAAADZ128F9348C2E.json
3. /home/workspace/data/song_data/A/A/A/TRAAAMO128F1481E7F.json
4. /home/workspace/data/song_data/A/A/A/TRAAAVG12903CFA543.json
5. /home/workspace/data/song_data/A/A/A/TRAAARJ128F9320760.json
6. /home/workspace/data/song_data/A/A/A/TRAAAMQ128F1460CD3.json
7. /home/workspace/data/song_data/A/A/A/TRAAAPK128E0786D96.json
8. /home/workspace/data/song_data/A/A/A/TRAAAFD128F92F423A.json
9. /home/workspace/data/song_data/A/A/A/TRAAAEF128F4273421.json
10. /home/workspace/data/song_data/A/A/A/TRAAABD128F429CF47.json
11. /home/workspace/data/song_data/A/A/B/TRAABCL128F4286650.json
12. /home/workspace/data/song_data/A/A/B/TRAABJV128F1460C49.json
13. /home/workspace/data/song_data/A/A/B/TRAABJL12903CDCF1A.json
14. /home/workspace/data/song_data/A/A/B/TRAABVM128F92CA9DC.json
15. /home/workspace/

In [212]:
print("File {}".format(song_files[0]))
df = pd.read_json(song_files[0], lines=True)
print("df shape {}".format(df.shape))
df.head()

df.info()

File /home/workspace/data/song_data/A/A/A/TRAAAAW128F429D538.json
df shape (1, 10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 10 columns):
artist_id           1 non-null object
artist_latitude     0 non-null float64
artist_location     1 non-null object
artist_longitude    0 non-null float64
artist_name         1 non-null object
duration            1 non-null float64
num_songs           1 non-null int64
song_id             1 non-null object
title               1 non-null object
year                1 non-null int64
dtypes: float64(3), int64(2), object(5)
memory usage: 160.0+ bytes


In [213]:
df[df.isnull().any(1)]

Unnamed: 0,artist_id,artist_latitude,artist_location,artist_longitude,artist_name,duration,num_songs,song_id,title,year
0,ARD7TVE1187B99BFB1,,California - LA,,Casual,218.93179,1,SOMZWCG12A8C13C480,I Didn't Mean To,0


## #1: `songs` Table
#### Extract Data for Songs Table
- Select columns for song ID, title, artist ID, year, and duration
- Use `df.values` to select just the values from the dataframe
- Index to select the first (only) record in the dataframe
- Convert the array to a list and set it to `song_data`

In [214]:
song_data = df[['song_id', 'title', 'artist_id', 'year', 'duration']].values[0]
song_data

array(['SOMZWCG12A8C13C480', "I Didn't Mean To", 'ARD7TVE1187B99BFB1', 0,
       218.93179], dtype=object)

#### Insert Record into Song Table
Implement the `song_table_insert` query in `sql_queries.py` and run the cell below to insert a record for this song into the `songs` table. Remember to run `create_tables.py` before running the cell below to ensure you've created/resetted the `songs` table in the sparkify database.

In [215]:
try:    
    cur.execute(song_table_insert, song_data)
    conn.commit()
except Exception as e:
    conn.rollback()
    print("{} : Error Message : {}".format('Rolled back', e))

Run `test.ipynb` to see if you've successfully added a record to this table.

## #2: `artists` Table
#### Extract Data for Artists Table
- Select columns for artist ID, name, location, latitude, and longitude
- Use `df.values` to select just the values from the dataframe
- Index to select the first (only) record in the dataframe
- Convert the array to a list and set it to `artist_data`

In [216]:
df[['artist_id', 'artist_name', 'artist_location', 'artist_latitude', 'artist_longitude']]

Unnamed: 0,artist_id,artist_name,artist_location,artist_latitude,artist_longitude
0,ARD7TVE1187B99BFB1,Casual,California - LA,,


In [217]:
# Had a bit of concern of dataFrame column names are different than table column names, it doesn't matter it seems. Thats good news.
artist_data = df[['artist_id', 'artist_name', 'artist_location', 'artist_latitude', 'artist_longitude']].values[0]
artist_data

array(['ARD7TVE1187B99BFB1', 'Casual', 'California - LA', nan, nan], dtype=object)

In [218]:
df.head()

Unnamed: 0,artist_id,artist_latitude,artist_location,artist_longitude,artist_name,duration,num_songs,song_id,title,year
0,ARD7TVE1187B99BFB1,,California - LA,,Casual,218.93179,1,SOMZWCG12A8C13C480,I Didn't Mean To,0


#### Insert Record into Artist Table
Implement the `artist_table_insert` query in `sql_queries.py` and run the cell below to insert a record for this song's artist into the `artists` table. Remember to run `create_tables.py` before running the cell below to ensure you've created/resetted the `artists` table in the sparkify database.

In [219]:
try:    
    cur.execute(artist_table_insert, artist_data)
    conn.commit()
except Exception as e:
    conn.rollback()
    print("{} : Error Message : {}".format('Rolled back', e))

Run `test.ipynb` to see if you've successfully added a record to this table.

# Process `log_data`
In this part, you'll perform ETL on the second dataset, `log_data`, to create the `time` and `users` dimensional tables, as well as the `songplays` fact table.

Let's perform ETL on a single log file and load a single record into each table.
- Use the `get_files` function provided above to get a list of all log JSON files in `data/log_data`
- Select the first log file in this list
- Read the log file and view the data

In [220]:
filepath= 'data/log_data'
log_files = get_files(filepath)
print("Total Files {}".format(len(log_files)))
for i, fp in enumerate(log_files):
    print("{}. {}".format(i, fp))

Total Files 30
0. /home/workspace/data/log_data/2018/11/2018-11-14-events.json
1. /home/workspace/data/log_data/2018/11/2018-11-28-events.json
2. /home/workspace/data/log_data/2018/11/2018-11-30-events.json
3. /home/workspace/data/log_data/2018/11/2018-11-05-events.json
4. /home/workspace/data/log_data/2018/11/2018-11-01-events.json
5. /home/workspace/data/log_data/2018/11/2018-11-12-events.json
6. /home/workspace/data/log_data/2018/11/2018-11-09-events.json
7. /home/workspace/data/log_data/2018/11/2018-11-20-events.json
8. /home/workspace/data/log_data/2018/11/2018-11-08-events.json
9. /home/workspace/data/log_data/2018/11/2018-11-04-events.json
10. /home/workspace/data/log_data/2018/11/2018-11-11-events.json
11. /home/workspace/data/log_data/2018/11/2018-11-10-events.json
12. /home/workspace/data/log_data/2018/11/2018-11-27-events.json
13. /home/workspace/data/log_data/2018/11/2018-11-07-events.json
14. /home/workspace/data/log_data/2018/11/2018-11-03-events.json
15. /home/workspace/

In [221]:
df = pd.read_json(log_files[0], lines=True)
print("df shape {}".format(df.shape))
df.head(3)
#df.info()

df shape (457, 18)


Unnamed: 0,artist,auth,firstName,gender,itemInSession,lastName,length,level,location,method,page,registration,sessionId,song,status,ts,userAgent,userId
0,The Grass Roots,Logged In,Sara,F,72,Johnson,166.71302,paid,"Winston-Salem, NC",PUT,NextSong,1540809000000.0,411,Let's Live For Today,200,1542153802796,"""Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like...",95
1,Stars,Logged In,Sara,F,73,Johnson,298.94485,paid,"Winston-Salem, NC",PUT,NextSong,1540809000000.0,411,Time Can Never Kill The True Heart,200,1542153968796,"""Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like...",95
2,Eddie Palmieri,Logged In,Sara,F,74,Johnson,391.83628,paid,"Winston-Salem, NC",PUT,NextSong,1540809000000.0,411,Nada De Ti,200,1542154266796,"""Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like...",95


In [222]:
#convert timestamp(milli) to timestamp(normal)
df['ts'] = pd.to_datetime(df['ts'], unit='ms')

In [223]:
df[df.userId == '80']
df[(df.userId == '80') & (df.itemInSession == 0)]

Unnamed: 0,artist,auth,firstName,gender,itemInSession,lastName,length,level,location,method,page,registration,sessionId,song,status,ts,userAgent,userId
19,John Mayer,Logged In,Tegan,F,0,Levine,201.16853,paid,"Portland-South Portland, ME",PUT,NextSong,1540794000000.0,548,Waiting On The World To Change,200,2018-11-14 02:57:45.796,"""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4...",80
333,Wolfmother,Logged In,Tegan,F,0,Levine,280.86812,paid,"Portland-South Portland, ME",PUT,NextSong,1540794000000.0,574,Joker And The Thief,200,2018-11-14 15:13:13.796,"""Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4...",80


## #3: `time` Table
#### Extract Data for Time Table
- Filter records by `NextSong` action
- Convert the `ts` timestamp column to datetime
  - Hint: the current timestamp is in milliseconds
- Extract the timestamp, hour, day, week of year, month, year, and weekday from the `ts` column and set `time_data` to a list containing these values in order
  - Hint: use pandas' [`dt` attribute](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.html) to access easily datetimelike properties.
- Specify labels for these columns and set to `column_labels`
- Create a dataframe, `time_df,` containing the time data for this file by combining `column_labels` and `time_data` into a dictionary and converting this into a dataframe

In [224]:
dfNS = df[df.page == 'NextSong']
df.shape
#df.head()

(457, 18)

In [225]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.html
# Googled series.dt and got the below link
# This helped https://pandas.pydata.org/pandas-docs/stable/reference/series.html#time-series-related
# Search for Datetime properties
t = dfNS.copy()
t['ts'] = pd.to_datetime(t['ts'], unit='ms')
#t = t[:1] #Test with 1 row
t.head()
print(t.shape)
print(t.ts[0], t.ts.dt.hour[0], t.ts.dt.day[0] , t.ts.dt.dayofweek[0] , t.ts.dt.month[0], t.ts.dt.year[0] , t.ts.dt.weekday[0])

(390, 18)
2018-11-14 00:03:22.796000 0 14 2 11 2018 2


In [226]:
time_data = [t.ts, t.ts.dt.hour, t.ts.dt.day, t.ts.dt.week, t.ts.dt.month, t.ts.dt.year, t.ts.dt.weekday]
#time_data = [t.ts[0], t.ts.dt.hour[0], t.ts.dt.day[0] , t.ts.dt.dayofweek[0] , t.ts.dt.month[0], t.ts.dt.year[0] , t.ts.dt.weekday[0]]
column_labels = ['start_time', 'hour', 'day', 'week', 'month', 'year', 'weekday']

In [227]:
a = zip(column_labels, time_data)
a = list(a)
#print(a)
a = dict(zip(column_labels, time_data))
#print(a)

In [228]:
time_df = pd.DataFrame.from_dict(dict(zip(column_labels, time_data)))
time_df.head()

Unnamed: 0,start_time,hour,day,week,month,year,weekday
0,2018-11-14 00:03:22.796,0,14,46,11,2018,2
1,2018-11-14 00:06:08.796,0,14,46,11,2018,2
2,2018-11-14 00:11:06.796,0,14,46,11,2018,2
3,2018-11-14 00:17:37.796,0,14,46,11,2018,2
4,2018-11-14 00:20:25.796,0,14,46,11,2018,2


In [229]:
# Iterate each cell in a column
#for index, row in time_df['hour'].iteritems():
#    print(row)

In [230]:
# Iterate each rows
#for index, row in time_df.iterrows():
#    print(row['hour'], row['weekday'])

#### Insert Records into Time Table
Implement the `time_table_insert` query in `sql_queries.py` and run the cell below to insert records for the timestamps in this log file into the `time` table. Remember to run `create_tables.py` before running the cell below to ensure you've created/resetted the `time` table in the sparkify database.

In [231]:
try:        
    for i, row in time_df.iterrows():
        cur.execute(time_table_insert, list(row))
        conn.commit()    
except Exception as e:
    conn.rollback()
    print("{} : Error Message : {}".format('Rolled back', e))

Run `test.ipynb` to see if you've successfully added records to this table.

## #4: `users` Table
#### Extract Data for Users Table
- Select columns for user ID, first name, last name, gender and level and set to `user_df`

In [232]:
user_df = df[['userId', 'firstName', 'lastName', 'gender', 'level', 'ts']]
user_df.head()
user_df[user_df.userId == '80'].head(20)

Unnamed: 0,userId,firstName,lastName,gender,level,ts
19,80,Tegan,Levine,F,paid,2018-11-14 02:57:45.796
20,80,Tegan,Levine,F,paid,2018-11-14 03:01:06.796
21,80,Tegan,Levine,F,paid,2018-11-14 03:03:08.796
22,80,Tegan,Levine,F,paid,2018-11-14 03:06:36.796
23,80,Tegan,Levine,F,paid,2018-11-14 03:09:36.796
24,80,Tegan,Levine,F,paid,2018-11-14 03:12:32.796
25,80,Tegan,Levine,F,paid,2018-11-14 03:13:19.796
26,80,Tegan,Levine,F,paid,2018-11-14 03:17:50.796
27,80,Tegan,Levine,F,paid,2018-11-14 03:21:57.796
28,80,Tegan,Levine,F,paid,2018-11-14 03:26:35.796


In [233]:
user_df[(user_df.userId == '80') & (user_df.level == 'free')].sort_values(by=['userId', 'ts']).head()

Unnamed: 0,userId,firstName,lastName,gender,level,ts


#### Insert Records into Users Table
Implement the `user_table_insert` query in `sql_queries.py` and run the cell below to insert records for the users in this log file into the `users` table. Remember to run `create_tables.py` before running the cell below to ensure you've created/resetted the `users` table in the sparkify database.

#### Cleaning user_df

In [234]:
df_uc = user_df.copy()
print('All = ',df_uc.shape)
print('Null = ',df_uc[df_uc.firstName.isnull()].shape)
print('Not null = ',df_uc[df_uc.firstName.notnull()].shape)

All =  (457, 6)
Null =  (12, 6)
Not null =  (445, 6)


In [235]:
# Shows rows where any cell has null values.
#df_uc[df_uc.firstName.isnull()]
print(df_uc[df_uc.isnull().any(1)])

    userId firstName lastName gender level                      ts
78              None     None   None  free 2018-11-14 05:55:03.796
79              None     None   None  free 2018-11-14 05:55:41.796
80              None     None   None  free 2018-11-14 05:55:42.796
286             None     None   None  paid 2018-11-14 12:09:01.796
287             None     None   None  paid 2018-11-14 12:09:02.796
330             None     None   None  paid 2018-11-14 13:40:39.796
345             None     None   None  free 2018-11-14 15:27:30.796
359             None     None   None  free 2018-11-14 15:48:07.796
360             None     None   None  free 2018-11-14 15:48:19.796
425             None     None   None  paid 2018-11-14 19:38:09.796
426             None     None   None  paid 2018-11-14 19:38:34.796
427             None     None   None  paid 2018-11-14 19:38:50.796


In [236]:
# Assessment user_df : userId
# 12 Rows having userId column value as ''. So changing that to None datatype
print(df_uc.loc[78])
print(type(None))
print('Sample Nonetype = ',type(df_uc.loc[78,'firstName']))
print('Empty String = ',type(df_uc.loc[78,'userId']))

userId                                 
firstName                          None
lastName                           None
gender                             None
level                              free
ts           2018-11-14 05:55:03.796000
Name: 78, dtype: object
<class 'NoneType'>
Sample Nonetype =  <class 'NoneType'>
Empty String =  <class 'str'>


In [237]:
# Cleaning user_df : userId
df_uc = user_df.copy()
cnt=0
nullList = []
for row in df_uc['userId']:
    try:
        #print(cnt, df_uc.loc[cnt, 'userId'])
        if df_uc.loc[cnt, 'userId'] == '':
            nullList.append(cnt)
            #print(cnt)
            df_uc.loc[cnt, 'userId'] = None
    except ValueError:
        pass
    cnt+=1

#df_uc.loc[78, 'userId']
print('Null list = {}'.format(nullList))
print(df_uc[df_uc.userId.isnull()])

#df_uc[df_uc.firstName.isnull()].head()

#Need to clean up these null rows

Null list = [78, 79, 80, 286, 287, 330, 345, 359, 360, 425, 426, 427]
    userId firstName lastName gender level                      ts
78    None      None     None   None  free 2018-11-14 05:55:03.796
79    None      None     None   None  free 2018-11-14 05:55:41.796
80    None      None     None   None  free 2018-11-14 05:55:42.796
286   None      None     None   None  paid 2018-11-14 12:09:01.796
287   None      None     None   None  paid 2018-11-14 12:09:02.796
330   None      None     None   None  paid 2018-11-14 13:40:39.796
345   None      None     None   None  free 2018-11-14 15:27:30.796
359   None      None     None   None  free 2018-11-14 15:48:07.796
360   None      None     None   None  free 2018-11-14 15:48:19.796
425   None      None     None   None  paid 2018-11-14 19:38:09.796
426   None      None     None   None  paid 2018-11-14 19:38:34.796
427   None      None     None   None  paid 2018-11-14 19:38:50.796


In [238]:
print('All = ',df_uc.shape)
df_uc = df_uc[df_uc.firstName.notnull()]
print('Not Null = ',df_uc.shape)
user_df = df_uc.copy()

All =  (457, 6)
Not Null =  (445, 6)


In [239]:
user_df.head()

Unnamed: 0,userId,firstName,lastName,gender,level,ts
0,95,Sara,Johnson,F,paid,2018-11-14 00:03:22.796
1,95,Sara,Johnson,F,paid,2018-11-14 00:06:08.796
2,95,Sara,Johnson,F,paid,2018-11-14 00:11:06.796
3,95,Sara,Johnson,F,paid,2018-11-14 00:17:37.796
4,95,Sara,Johnson,F,paid,2018-11-14 00:20:25.796


In [240]:
#for i, row in df_uc.iterrows():
try:
    for i, row in user_df.iterrows():
        cur.execute(user_table_insert, row)
        conn.commit()
except Exception as e:
    conn.rollback()
    print("{} : Error Message : {}".format('Rolled back', e))

In [241]:
# Delete null rows
try:        
    cur.execute("delete from users a where a.user_id is null;")
    print(cur.rowcount)
except Exception as e:
    conn.rollback()
    print("{} : Error Message : {}".format('Rolled back', e))

0


Run `test.ipynb` to see if you've successfully added records to this table.

## #5: `songplays` Table
#### Extract Data and Songplays Table
This one is a little more complicated since information from the songs table, artists table, and original log file are all needed for the `songplays` table. Since the log file does not specify an ID for either the song or the artist, you'll need to get the song ID and artist ID by querying the songs and artists tables to find matches based on song title, artist name, and song duration time.
- Implement the `song_select` query in `sql_queries.py` to find the song ID and artist ID based on the title, artist name, and duration of a song.
- Select the timestamp, user ID, level, song ID, artist ID, session ID, location, and user agent and set to `songplay_data`

#### Insert Records into Songplays Table
- Implement the `songplay_table_insert` query and run the cell below to insert records for the songplay actions in this log file into the `songplays` table. Remember to run `create_tables.py` before running the cell below to ensure you've created/resetted the `songplays` table in the sparkify database.

In [242]:
df.shape

(457, 18)

In [243]:
df.head(1)

Unnamed: 0,artist,auth,firstName,gender,itemInSession,lastName,length,level,location,method,page,registration,sessionId,song,status,ts,userAgent,userId
0,The Grass Roots,Logged In,Sara,F,72,Johnson,166.71302,paid,"Winston-Salem, NC",PUT,NextSong,1540809000000.0,411,Let's Live For Today,200,2018-11-14 00:03:22.796,"""Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like...",95


In [244]:
print(song_select)


SELECT a.song_id, a.artist_id 
FROM songs a INNER JOIN artists b 
ON a.artist_id = b.artist_id 
WHERE a.title = %s AND b.name = %s AND a.duration = %s ;



In [245]:
#only clue here is 'df' thats log_data. Its a bit frustrating when its not said what to do. Time burn.
for index, row in df.iterrows():

    # get songid and artistid from song and artist tables
    a = sql_queries.song_select    
    cur.execute(a, (row.song, row.artist, row.length))
    results = cur.fetchone()

    print(results, (row.song, row.artist, row.length))
    
    if results:
        songid, artistid = results
    else:
#        continue
        songid, artistid = None, None

    '''
        print(songid)
        if type(songid) == type(None): 
            status = 'Skip'
        else:
            status = 'Insert'

        if type(songid) == type(None): 
            continue
    '''                
        
    #print("{} : {} : songid({}) song({}), artist({}), length({})".format(status, type(songid), songid, row.song, row.artist, row.length))        
    starttime = pd.to_datetime(row.ts,unit='ms')
        
    # insert songplay record
    # insert into songplay (songplay_id, start_time, user_id, level, song_id, artist_id, session_id, location, user_agent) 
    songplay_data = (starttime, row.userId, row.level, songid, artistid, row.sessionId, row.location, row.userAgent
                     , row.itemInSession, row.song, row.artist)
    try:
        cur.execute(songplay_table_insert, songplay_data)
        conn.commit()
    except Exception as e:
        conn.rollback()
        print("{} : Error Message : {}".format('Rolled back', e))        


None ("Let's Live For Today", 'The Grass Roots', 166.71302)
None ('Time Can Never Kill The True Heart', 'Stars', 298.94485)
None ('Nada De Ti', 'Eddie Palmieri', 391.83628)
None ('Give In', 'The Bravery', 168.14975)
None ('Anna', 'K.U.K.L', 181.28934)
None ('Syndicate', 'The Fray', 209.97179)
None ('Roll Over Beethoven', 'Chuck Berry', 147.06893)
None ('Kings Of The Carnival Creation', 'Dimmu Borgir', 468.47955)
None ("Ain't No Doubt", 'Jimmy Nail', 245.65506)
None ('Pork And Beans', 'Weezer', 189.43955)
None (None, None, nan)
None ("Beggin'", 'Madcon', 215.92771)
None ('Teach Me Tonight', 'Joe Williams / Count Basie', 184.94649)
None ('Piano Concerto No. 2 in G minor Op. 22 (2006 Digital Remaster): III.  Presto', 'Emil Gilels/Orchestre de la SociÃ\x83Â©tÃ\x83Â© des Concerts du Conservatoire/AndrÃ\x83Â© Cluytens', 375.19628)
None ('Sunday Morning', 'Modwheelmood', 209.71057)
None (None, None, nan)
None (None, None, nan)
None (None, None, nan)
None (None, None, nan)
None ('Waiting On Th

Run `test.ipynb` to see if you've successfully added records to this table.

# Close Connection to Sparkify Database

In [5]:
conn.close()

# Implement `etl.py`
Use what you've completed in this notebook to implement `etl.py`.