- Spotify CSV File: data.csv
- Spotify API: https://developer.spotify.com/documentation/web-api/reference/#endpoint-get-audio-features
* .gitignore file contains config.py file to store the spotify API client username and password. 

In [1]:
#Dependencies
import pandas as pd
from sqlalchemy import create_engine

SPOTIFY CSV FILE - Extract and Transform



Extract CSV into DataFrames

In [2]:
spotify_csv = "resources/data.csv"

Transform CSV DataFrame

In [3]:
csv_df = pd.read_csv(spotify_csv)
csv_df.head()
# Keep columns: id, name, artist, key, tempo, year, popularity

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0.991,['Mamie Smith'],0.598,168333,0.224,0,0cS0A1fUEUd1EW3FcF8AEI,0.000522,5,0.379,-12.628,0,Keep A Song In Your Soul,12,1920,0.0936,149.976,0.634,1920
1,0.643,"[""Screamin' Jay Hawkins""]",0.852,150200,0.517,0,0hbkKFIJm7Z05H8Zl9w30f,0.0264,5,0.0809,-7.261,0,I Put A Spell On You,7,1920-01-05,0.0534,86.889,0.95,1920
2,0.993,['Mamie Smith'],0.647,163827,0.186,0,11m7laMUgmOKqI3oYzuhne,1.8e-05,0,0.519,-12.098,1,Golfing Papa,4,1920,0.174,97.6,0.689,1920
3,0.000173,['Oscar Velazquez'],0.73,422087,0.798,0,19Lc5SfJJ5O1oaxY0fpwfh,0.801,2,0.128,-7.311,1,True House Music - Xavier Santos & Carlos Gomi...,17,1920-01-01,0.0425,127.997,0.0422,1920
4,0.295,['Mixe'],0.704,165224,0.707,1,2hJjbsLCytGsnAHfdsLejp,0.000246,10,0.402,-6.036,0,Xuniverxe,2,1920-10-01,0.0768,122.076,0.299,1920


In [4]:
# Select only the columns required
new_csv_df = csv_df[['id','name', 'artists','key','tempo','year','popularity']].copy()
new_csv_df.head()

# Drop ids with duplicates to ensure no double up
new_csv_df = new_csv_df.drop_duplicates(subset=['id'])

# TEST: new_csv_df.info()
# There are 174389 row entries before drop duplicates
# There are 172230 row entries after drop duplicates

# Rename columns
new_csv_df=new_csv_df.rename(columns={'name':'s_name', 'artists': 's_artist','key':'s_key','tempo':'s_tempo','year':'s_year','popularity':'s_popularity'})
new_csv_df


Unnamed: 0,id,s_name,s_artist,s_key,s_tempo,s_year,s_popularity
0,0cS0A1fUEUd1EW3FcF8AEI,Keep A Song In Your Soul,['Mamie Smith'],5,149.976,1920,12
1,0hbkKFIJm7Z05H8Zl9w30f,I Put A Spell On You,"[""Screamin' Jay Hawkins""]",5,86.889,1920,7
2,11m7laMUgmOKqI3oYzuhne,Golfing Papa,['Mamie Smith'],0,97.600,1920,4
3,19Lc5SfJJ5O1oaxY0fpwfh,True House Music - Xavier Santos & Carlos Gomi...,['Oscar Velazquez'],2,127.997,1920,17
4,2hJjbsLCytGsnAHfdsLejp,Xuniverxe,['Mixe'],10,122.076,1920,2
...,...,...,...,...,...,...,...
174379,45XnLMuqf3vRfskEAMUeCH,A Little More,['Alessia Cara'],4,94.710,2021,0
174381,4pPFI9jsguIh3wC7Otoyy8,A Little More,['Alessia Cara'],4,94.710,2021,0
174383,52YtxLVUyvtiGPxwwxayHZ,A Little More,['Alessia Cara'],4,94.710,2021,0
174385,7tue2Wemjd0FZzRtDrQFZd,A Little More,['Alessia Cara'],4,94.710,2021,0


SPOTIFY API

- Objective: To extract the time signature of all tracks on spotify

In [6]:
#Dependences
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config import *
import requests
# from config import cid
# from config import secret

In [12]:


CLIENT_ID = cid
CLIENT_SECRET = secret

AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

headers = {
    'Authorization': 'Bearer {token}'.format(token=access_token)
}

In [10]:
id_list= []
for index, row in csv_df.iterrows():
    print(row['id'])
    id_list.append(row['id'])
    # print(id_list)

KFpwNBYnooFm
5zx77WvehRW4sbXWEmenY2
188vCXc4bAejLqGhpEVBGT
62bOSBdk5fEPJtPjvp7NIn
6urLjX35oXXRYtcH1b3dCh
13J9KEgQztokbVsq20sp6f
2Q85xwXagilpHKTfdrbC7M
33OAw9HSbqb9PIawSwbRSL
16iNVfWKKZPDjctIcd0KMn
7Ho9Ci4iUms5T2msr5tqoa
6xL9MJ2yt39HPPZPyGkRy8
1CXIdCgBqZg5QlUIbVrZ88
2bPYtv3okqX5zpSxtwLBuu
5DTZxyH5wlTvjwbp8Ok1U5
23xup5SPK9QGT5Rs5qUzOm
4RrSb0bSX4z3DZQdVXSOSm
6A8dnC0xkiuWN4BshmTB2I
76Eam9jEJXJrYYdJATJhET
0y8UKPyJOluqIuacosTKEv
6Fl5wnMHLvtB6pyx7W7aeq
1DA7FEKgQeyRPtU44L5DOf
0e4EqF8HiAKVS7o6bDMOf9
3JWh19WsndEFg0UFRFeTiU
7IHYGIz0dZRngGMuQciEVT
5u9cuDcYdOpSRB7fAYXXdp
3rQWfyD9N3kTkp2305bT5P
11fWR3u9wjDMW4oVDbUbyT
5whJtTCr8X0rsi414ltkei
0WSPIU3Y3bNXsGeKnzSnt1
5I0BbcHNaVODukbJbEWBkJ
3zoFkjJCH2ntsad8kfRhyi
2i4xYu1vBMvslBK2zbVvjI
3OuXuCuaZ889JdW2cX1dDw
6bwlVR9dwAWfJpXgnslbdz
1rLWsLY98u9hcWePdfN2y5
2wF80wAos6cSWuoE7JF4HG
53mrVsi49rLHIaKBiSvElG
34XoBxjnbj0z8oH1ovtp3w
5YY7ht3PCArlLjLbcTiAvh
7J2uqAiMXoYQhcxo1TkoMc
5WPXnJulBVflChZ8qIe91d
1V297gw8pk1VROba6cKpzg
5GG3knKdxKWrNboRijxeKF
2B4ZkrVA48qTzHJQ1R8eF

In [39]:
id_timesig= []
count=0
# for item in id_list[:5]:
for item in id_list:
    # url = "https://api.spotify.com/v1/audio-features/{id}"

    BASE_URL = 'https://api.spotify.com/v1/'

    # Track ID from the URI
    # track_id = '6y0igZArWVi6Iz0rj35c1Y'
    track_id = item
    # actual GET request with proper header
    r = requests.get(BASE_URL + 'audio-features/' + track_id, headers=headers)
    r = r.json()
    # print(r)

    try:
        # print(f"id: {r['id']}")
        # print(f"time_signature: {r['time_signature']}")
        # print('-------------------------')
        new_song = r['id'],r['time_signature']
        id_timesig.append(new_song)
        count+=1
        print(count)
    # except ValueError:
    except ValueError or KeyError:
        print("JSON Decode Error on id or time signature")

print("All ids and time signatures appended")


KeyError: 'id'

In [19]:
id_timesig

[('0cS0A1fUEUd1EW3FcF8AEI', 4),
 ('0hbkKFIJm7Z05H8Zl9w30f', 4),
 ('11m7laMUgmOKqI3oYzuhne', 4),
 ('19Lc5SfJJ5O1oaxY0fpwfh', 4),
 ('2hJjbsLCytGsnAHfdsLejp', 4),
 ('3HnrHGLE9u2MjHtdobfWl9', 4),
 ('5DlCyqLyX2AOVDTjjkDZ8x', 4),
 ('02FzJbHtqElixxCmrpSCUa', 3),
 ('02i59gYdjlhBmbbWhf8YuK', 4),
 ('06NUxS2XL3efRh0bloxkHm', 4),
 ('07jrRR1CUUoPb1FLfSy9Jh', 4),
 ('0ANuF7SvPeIHanGcCpy9jR', 4),
 ('0BEO6nHi1rmTOPiEZvCIDW', 4),
 ('0DH1IROKoPK5XTglUt9Pq0', 4),
 ('0HVjPaxbyfFcg8Rh0plyo5', 4),
 ('0Hn7LWy1YcKhPaA2NItG9K', 4),
 ('0I6DjrEfd3fKFESHEjnelr', 4),
 ('0KGiP9EW1xtojDHsTGARL5', 4),
 ('0KNI2d7l3ByVHU0g2aW3P0', 4),
 ('0LYNwxHYHPW256lO2phedM', 4),
 ('0MwMyEO5AxYpghTU6gB85H', 4),
 ('0OM9aSti0UOwN9yuz1m85y', 4),
 ('0RdLX7r5HrPOGQabZIjCfi', 4),
 ('0Rfu6umKtRBny2reKydNph', 4),
 ('0TIfXVGqyiOTuWkuxEzQzc', 4),
 ('0TT2zA8fatUuvTnPABz3lS', 4),
 ('0UYplqEm5qa4hkEo64C0TW', 3),
 ('0UztrUuVBPMn7cEys3LWQ4', 4),
 ('0XGoQAeP51ySJbtqXhuJrl', 4),
 ('0YCVsd1quXrjZWIiROp1vw', 4),
 ('0YHjhp3fj1IgLFtDsw8v3M', 4),
 ('0Yt4h

In [23]:
id_timesig_df = pd.DataFrame(id_timesig)
id_timesig_df

Unnamed: 0,0,1
0,0cS0A1fUEUd1EW3FcF8AEI,4
1,0hbkKFIJm7Z05H8Zl9w30f,4
2,11m7laMUgmOKqI3oYzuhne,4
3,19Lc5SfJJ5O1oaxY0fpwfh,4
4,2hJjbsLCytGsnAHfdsLejp,4
...,...,...
5817,5jSpy3KyBwLxruntfWbcDR,4
5818,5ttXtQa7Ru0AlS9ox0v0kC,4
5819,6cD744rp35E6G1XfVkoKj6,4
5820,0KKOffDuUhWFxyrA4yCbom,4


In [36]:
id_timesig_df=id_timesig_df.rename(columns={0: "id", 1: "s_timesig"})
#Save to csv file
id_timesig_df.to_csv('resources/id_timesig_df')
id_timesig_df


Unnamed: 0,id,s_timesig
0,0cS0A1fUEUd1EW3FcF8AEI,4
1,0hbkKFIJm7Z05H8Zl9w30f,4
2,11m7laMUgmOKqI3oYzuhne,4
3,19Lc5SfJJ5O1oaxY0fpwfh,4
4,2hJjbsLCytGsnAHfdsLejp,4
...,...,...
5817,5jSpy3KyBwLxruntfWbcDR,4
5818,5ttXtQa7Ru0AlS9ox0v0kC,4
5819,6cD744rp35E6G1XfVkoKj6,4
5820,0KKOffDuUhWFxyrA4yCbom,4


Load

In [30]:
# Connect to local database
rds_connection_string = "postgres:postgres@localhost:5432/music_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [31]:
# Create a music_db. Add the tables into postgres database as per schema.sql
engine.table_names()

['spotify_csv', 'spotify_api']

In [35]:
# Load pandas dataframe new_csv_df to database music_db, table spotify_csv
new_csv_df.to_sql(name='spotify_csv', con=engine, if_exists='append', index=False)

In [None]:
# Load pandas dataframe id_timesig_df to database music_db, table spotify_api
id_timesig_df.to_sql(name='spotify_api', con=engine, if_exists='append', index=False)
