In [1]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import json
import os
import requests

In [46]:
# Generate token

username = "081584346736"
client_id = os.environ["SPOTIPY_CLIENT_ID"]
client_secret = os.environ["SPOTIPY_CLIENT_SECRET"]
redirect_uri = os.environ["SPOTIPY_REDIRECT_URI"]
scope = "user-read-recently-played"

token = util.prompt_for_user_token(username=username,
                                   scope=scope,
                                   client_id=client_id,
                                   client_secret=client_secret,
                                   redirect_uri=redirect_uri)

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

In [3]:
# Import Data

with open("raw/StreamingHistory0.json", "r") as f0:
    data0 = json.load(f0)

with open("raw/StreamingHistory1.json", "r") as f1:
    data1 = json.load(f1)

with open("raw/StreamingHistory2.json", "r") as f2:
    data2 = json.load(f2)

data0 = pd.json_normalize(data0)
data1 = pd.json_normalize(data1)
data2 = pd.json_normalize(data2)

# df = pd.concat([data0, data1, data2])
# df

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2020-12-18 08:12,Taylor Swift,evermore (feat. Bon Iver),304106
1,2020-12-18 08:13,Rachel Platten,Fight Song,32240
2,2020-12-18 08:18,Two Steps from Hell,Star Sky,330579
3,2020-12-18 08:24,Thomas Bergersen,Empire of Angels,316533
4,2020-12-18 08:46,SVRCINA,Meet Me on the Battlefield,1207
...,...,...,...,...
9995,2021-12-01 05:54,MIKA,Ready To Call This Love,229840
9996,2021-12-01 05:57,MIKA,Kids,183026
9997,2021-12-01 06:01,MIKA,Paloma,222893
9998,2021-12-01 08:08,MIKA,Emily,120030


In [13]:
# Read data
df = pd.read_csv("processed/played_songs.csv")
df_af = pd.read_csv("processed/played_songs_af.csv")

### Get track_uri

In [None]:
# Get track ID from track name and artist name

def get_track_uri(df, df_index):
    artist_name = df.iloc[df_index]["artistName"]
    track_name = df.iloc[df_index]["trackName"]

    try:
        tracks = sp.search(q=f"artist:{artist_name} track:{track_name}", type="track")
        return tracks["tracks"]["items"][0]["uri"]
    except:
        return None


# run from 0 until len(df)
for i in range(24000, 30000):
    df.loc[i, "track_uri"] = get_track_uri(df, i)

    if i % 500 == 0:
        df.to_csv("processed/played_songs.csv", index=False)
        print(f"=== Iteration {i} done ===")


In [31]:
def get_uri(artist_name, track_name, token):
    headers = {'Accept': 'application/json',
               'Content-Type': 'application/json',
               'Authorization': f'Bearer ' + token}
    params = [('q', track_name), ('type', 'track')]

    try:
        response = requests.get(url='https://api.spotify.com/v1/search', 
                                headers=headers, 
                                params=params, 
                                timeout=5)
        json = response.json()
        track_items = json['tracks']['items'][0]
        track_uri = track_items['uri']
        return track_uri
    except:
        return None

In [47]:
headers = {'Accept': 'application/json',
               'Content-Type': 'application/json',
               'Authorization': f'Bearer ' + token}
params = [('q', "evermore (feat. Bon Iver)"), ('type', 'track')]

response = requests.get(url='https://api.spotify.com/v1/search', 
                                headers=headers, 
                                params=params, 
                                timeout=5)

response

<Response [429]>

In [48]:
artist = "Taylor Swift"
track = "evermore (feat. Bon Iver)"
sp.search(q=f"artist:{artist} track:{track}", type="track", limit=1)

KeyboardInterrupt: 

### Get track audio features

In [5]:
features = {"danceability": [0], "energy": [0], "loudness": [0],
         "speechiness": [0], "acousticness": [0], "instrumentalness": [0],
         "liveness": [0], "valence": [0], "tempo": [0],
         "key": [0], "mode": [0], "time_signature": [0]}
temp = pd.DataFrame(features)
df = pd.concat([df, temp], axis=0)

In [20]:
# Get audio features of each track

def get_audio_features(track_uri):
    features_list = []
    if track_uri:
        try:
            features = sp.audio_features(track_uri)[0]
            features_list.append(features["danceability"])
            features_list.append(features["energy"])
            features_list.append(features["loudness"])
            features_list.append(features["speechiness"])
            features_list.append(features["acousticness"])
            features_list.append(features["instrumentalness"])
            features_list.append(features["liveness"])
            features_list.append(features["valence"])
            features_list.append(features["tempo"])
            features_list.append(features["key"])
            features_list.append(features["mode"])
            features_list.append(features["time_signature"])

            return features_list      
        except:
            return None


# run from 0 until len(df)
for i in range(20000, 30000):
    df_af.iloc[i, 5:] = get_audio_features(df_af.iloc[i, 4])

    if i % 500 == 0:
        df_af.to_csv("processed/played_songs_af.csv", index=False)
        print(f"=== Iteration {i} done ===")

print("=== Done ===")

=== Iteration 20000 done ===
=== Iteration 20500 done ===
=== Iteration 21000 done ===
=== Iteration 21500 done ===
=== Iteration 22000 done ===
=== Iteration 22500 done ===
=== Iteration 23000 done ===
=== Iteration 23500 done ===
=== Iteration 24000 done ===
=== Iteration 24500 done ===
=== Iteration 25000 done ===
=== Iteration 25500 done ===
=== Iteration 26000 done ===
=== Iteration 26500 done ===
=== Iteration 27000 done ===
=== Iteration 27500 done ===
=== Iteration 28000 done ===
=== Iteration 28500 done ===
=== Iteration 29000 done ===
=== Iteration 29500 done ===
=== Done ===
