# Setup 
I used spotipy to access the Spotify API, my api tokens are privately stored in config.py. 

In [1]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config import spotify_client_id, spotify_client_secret

Create connection to spotify api. 

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id = spotify_client_id, client_secret = spotify_client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

# Get Tracks 
Crazy Ex-Girlfriend has 4 seasons worth of tracks. Thankfully, the good people of the show have put all the tracks in a Spotify playlist: "Crazy Ex-girlfriend: The Complete Collection" 

In [4]:
#uri token for the playlist
playlist_URI = "spotify:playlist:5lJ50jaxfkNEDkCQFKsABt" 
#uri token for the user who created the playlist:
user_URI = "spotify:user:crazyexgirlfriend_official"

#get track information for all the tracks in the playlist
tracks1 = sp.user_playlist_tracks(user_URI, playlist_URI, limit = 100)   
tracks2 = sp.user_playlist_tracks(user_URI, playlist_URI, limit = 100, offset = 99)

#collect track uri's in a list
track_uri_list = []
for i in range(len(tracks1["items"])):
   track_uri_list.append(tracks1["items"][i]['track']['uri'])
for i in range(len(tracks2["items"])):
    track_uri_list.append(tracks2["items"][i]['track']['uri'])

# Create Dataframe to Hold Track Level Information

In [6]:
#column names
track_columns = ['name','track_uri','explicit', 'duration_ms', 'track_number', 'popularity',
                 'key', 'mode', 'time_signature', 'acousticness', 'danceability', 'energy', 'instrumentalness',
                 'liveness', 'loudness', 'speechiness', 'valence', 'tempo', 'season', 'artists']
#initialize dataframe
df_tracks = pd.DataFrame(columns = track_columns)

In [11]:
#extract information for each track 
for i in range(len(track_uri_list)):
    #initialize temporary dataframe to hold track information 
    df_tracks_temp = pd.DataFrame(columns = track_columns) 
    #get track info
    track_info = sp.track(track_uri_list[i])
    #get audio features
    audio_features = sp.audio_features(track_uri_list[i])
    #extract and store track info
    df_tracks_temp= pd.DataFrame(columns = track_columns)
    df_tracks_temp.loc[i, 'name'] = track_info['name']
    df_tracks_temp.loc[i,'track_uri'] = track_info['uri']
    df_tracks_temp.loc[i,'explicit'] = track_info['explicit']
    df_tracks_temp.loc[i,'duration_ms'] = track_info['duration_ms']
    df_tracks_temp.loc[i,'explicit'] = track_info['explicit'] 
    df_tracks_temp.loc[i,'popularity'] = track_info['popularity']
    df_tracks_temp.loc[i,'track_number'] = track_info['track_number'] 

    #extract and store audio feature info
    df_tracks_temp.loc[i,'key'] = float(audio_features[0]['key'])
    df_tracks_temp.loc[i,'mode'] = float(audio_features[0]['mode'])
    df_tracks_temp.loc[i,'time_signature'] = audio_features[0]['time_signature']
    df_tracks_temp.loc[i,'acousticness'] = float(audio_features[0]['acousticness'])
    df_tracks_temp.loc[i,'danceability'] = float(audio_features[0]['danceability'])
    df_tracks_temp.loc[i,'energy'] = float(audio_features[0]['energy'])
    df_tracks_temp.loc[i,'instrumentalness'] = float(audio_features[0]['instrumentalness'])
    df_tracks_temp.loc[i,'liveness'] = float(audio_features[0]['liveness'])
    df_tracks_temp.loc[i,'loudness'] = float(audio_features[0]['loudness'])
    df_tracks_temp.loc[i,'speechiness'] = float(audio_features[0]['speechiness'])
    df_tracks_temp.loc[i,'valence'] = float(audio_features[0]['valence'])
    df_tracks_temp.loc[i,'tempo'] = float(audio_features[0]['tempo'])
    
    #determine and store what season track is from
    if "Season 1" in track_info['album']['name']:
        df_tracks_temp.loc[i, "season"] = "Season 1"
    elif "Season 2" in track_info['album']['name']:
        df_tracks_temp.loc[i, "season"] = "Season 2"
    elif "Season 3" in track_info['album']['name']:
        df_tracks_temp.loc[i, "season"] = "Season 3"
    elif "Season 4" in track_info['album']['name']:
        df_tracks_temp.loc[i, "season"] = "Season 4"
    else:
        df_tracks_temp.loc[i, "season"] = "Season 3"
    
    #initialize empty list to store artists
    artists  = []
    #find and store the names of artist on all tracks
    for j in range(len(track_info['artists'])):
        artists.append(track_info['artists'][j]['name'])
    #store array of artists    
    df_tracks_temp.loc[i, "artists"] = artists
    
    #append temporary dataframe to larger track dataframe
    df_tracks = pd.concat([df_tracks, df_tracks_temp])
    

In [22]:
#remove demos and tracks
df_tracks = df_tracks[~df_tracks['name'].str.contains("demo" , case = False)]

#remove duplicate tracks 
df_tracks = df_tracks.drop_duplicates(subset = "name", keep = "last")

157 songs were written for the show, So I had to hand clean some of the rows. I am added the links to embedded youtube videos and used that to check. 

In [23]:
len(df_tracks)

160

In [24]:
df_tracks.to_csv("track_info.csv")