In [1]:
import config
import json
import numpy as np
import pandas as pd
import requests
import warnings
import time
warnings.filterwarnings("ignore")

In [2]:
# Import clean spotify playlist & song data
data = '/Users/florencialeoni/code/omg_i_love_that_song/data/spotify_audio_features_complete.csv'

In [3]:
# Create dataframe
df = pd.read_csv(data)

In [4]:
# Check dataframe first entries
df.head()

Unnamed: 0.1,Unnamed: 0,playlist_id,playlist_name,track_uri,track_name,artist_name,album_name,duration_ms,key,mode,time_signature,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo
0,0,../data/spotify1.json-1,Throwbacks,0UaMYEvWZi0ZqiDOoHU3YI,Lose Control (feat. Ciara & Fat Man Scoop),Missy Elliott,The Cookbook,226864.0,4.0,0.0,4.0,0.0311,0.904,0.813,0.00697,0.0471,-7.105,0.121,0.81,125.461
1,1,../data/spotify1.json-1,Throwbacks,6I9VzXrHxO9rA9A5euc8Ak,Toxic,Britney Spears,In The Zone,198800.0,5.0,0.0,4.0,0.0249,0.774,0.838,0.025,0.242,-3.914,0.114,0.924,143.04
2,2,../data/spotify1.json-1,Throwbacks,0WqIKmW4BTrj3eJFmnCKMv,Crazy In Love,Beyoncé,Dangerously In Love (Alben für die Ewigkeit),235933.0,2.0,0.0,4.0,0.00238,0.664,0.758,0.0,0.0598,-6.583,0.21,0.701,99.259
3,3,../data/spotify1.json-1,Throwbacks,1AWQoqb9bSvzTjaLralEkT,Rock Your Body,Justin Timberlake,Justified,267267.0,4.0,0.0,4.0,0.202,0.891,0.714,0.000234,0.0521,-6.055,0.14,0.818,100.972
4,4,../data/spotify1.json-1,Throwbacks,1lzr43nnXAijIGYnCT8M8H,It Wasn't Me,Shaggy,Hot Shot,227600.0,0.0,1.0,4.0,0.0561,0.853,0.606,0.0,0.313,-4.596,0.0713,0.654,94.759


In [5]:
# Drop Unnamed column from dataframe
df.drop(["Unnamed: 0"], axis = 1, inplace = True)

In [6]:
# Create empty columns for the values that will be pulled through API
df["release_date"] = None
df["popularity"] = np.nan

In [9]:
# Create base URL for API calls
base_url = "https://api.spotify.com/v1/tracks/"
# Iterate row by row making calls to API with track_uri information
for index, row in df.iterrows():
    # Printing index to have a visual check on progress (know which index is being processed)
    print(f"\r{index}", end = "")
    # Check if one of the new columns is empty, if so, start making API calls from that index on
    if pd.isna(row["release_date"]):
        # Make request to API using get method
        response = requests.get(base_url + row['track_uri'],
            headers={'Authorization': 'Bearer '})
        # If response is not 200 OK, print error message (to have a visual check on progress)
        while response.status_code != 200:
            print("Response error, waiting.")
            # Wait one second before making next request
            time.sleep(1)
            # Make request and check status again
            response = requests.get(base_url + row['track_uri'],
                headers={'Authorization': 'Bearer '})
        
        # Use json method on the response
        track_data = response.json()
        # Fill each column of the row with the values gotten from the API response
        df.at[index, "release_date"] = track_data["album"]["release_date"]
        df.at[index, "popularity"] = track_data["popularity"]


In [11]:
df.head()

Unnamed: 0,playlist_id,playlist_name,track_uri,track_name,artist_name,album_name,duration_ms,key,mode,time_signature,...,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,release_date,popularity,release_data
0,../data/spotify1.json-1,Throwbacks,0UaMYEvWZi0ZqiDOoHU3YI,Lose Control (feat. Ciara & Fat Man Scoop),Missy Elliott,The Cookbook,226864.0,4.0,0.0,4.0,...,0.813,0.00697,0.0471,-7.105,0.121,0.81,125.461,,65.0,2005-07-04
1,../data/spotify1.json-1,Throwbacks,6I9VzXrHxO9rA9A5euc8Ak,Toxic,Britney Spears,In The Zone,198800.0,5.0,0.0,4.0,...,0.838,0.025,0.242,-3.914,0.114,0.924,143.04,,78.0,2003-11-13
2,../data/spotify1.json-1,Throwbacks,0WqIKmW4BTrj3eJFmnCKMv,Crazy In Love,Beyoncé,Dangerously In Love (Alben für die Ewigkeit),235933.0,2.0,0.0,4.0,...,0.758,0.0,0.0598,-6.583,0.21,0.701,99.259,,58.0,2003
3,../data/spotify1.json-1,Throwbacks,1AWQoqb9bSvzTjaLralEkT,Rock Your Body,Justin Timberlake,Justified,267267.0,4.0,0.0,4.0,...,0.714,0.000234,0.0521,-6.055,0.14,0.818,100.972,,73.0,2002-11-04
4,../data/spotify1.json-1,Throwbacks,1lzr43nnXAijIGYnCT8M8H,It Wasn't Me,Shaggy,Hot Shot,227600.0,0.0,1.0,4.0,...,0.606,0.0,0.313,-4.596,0.0713,0.654,94.759,,17.0,2000


In [18]:
# Save the DataFrame created to a csv file
df.to_csv(r'/Users/florencialeoni/code/omg_i_love_that_song/data/spotify_genres_popularity.csv')