In [12]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import json
from datetime import datetime

In [3]:
# Import Data

with open("raw/StreamingHistory0.json", "r") as f0:
    data0 = json.load(f0)

with open("raw/StreamingHistory1.json", "r") as f1:
    data1 = json.load(f1)

with open("raw/StreamingHistory2.json", "r") as f2:
    data2 = json.load(f2)

data0 = pd.json_normalize(data0)
data1 = pd.json_normalize(data1)
data2 = pd.json_normalize(data2)

# df = pd.concat([data0, data1, data2])
# df

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2020-12-18 08:12,Taylor Swift,evermore (feat. Bon Iver),304106
1,2020-12-18 08:13,Rachel Platten,Fight Song,32240
2,2020-12-18 08:18,Two Steps from Hell,Star Sky,330579
3,2020-12-18 08:24,Thomas Bergersen,Empire of Angels,316533
4,2020-12-18 08:46,SVRCINA,Meet Me on the Battlefield,1207
...,...,...,...,...
9995,2021-12-01 05:54,MIKA,Ready To Call This Love,229840
9996,2021-12-01 05:57,MIKA,Kids,183026
9997,2021-12-01 06:01,MIKA,Paloma,222893
9998,2021-12-01 08:08,MIKA,Emily,120030


In [54]:
# Read data
df = pd.read_csv("played_songs.csv")
df.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed,track_uri
0,2020-12-18 08:12,Taylor Swift,evermore (feat. Bon Iver),304106,spotify:track:3O5osWf1rSoKMwe6E9ZaXP
1,2020-12-18 08:13,Rachel Platten,Fight Song,32240,spotify:track:37f4ITSlgPX81ad2EvmVQr
2,2020-12-18 08:18,Two Steps from Hell,Star Sky,330579,spotify:track:06AMpcajziFnEKniV25fiU
3,2020-12-18 08:24,Thomas Bergersen,Empire of Angels,316533,spotify:track:3AnYGQ8PB3lYrA6ToVUXa3
4,2020-12-18 08:46,SVRCINA,Meet Me on the Battlefield,1207,spotify:track:440JCAtxU17JnElhbXjVl0


In [55]:
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

In [6]:
# Get track ID from track name and artist name

def get_track_uri(df, df_index):
    artist_name = df.iloc[df_index]["artistName"]
    track_name = df.iloc[df_index]["trackName"]

    try:
        tracks = sp.search(q=f"artist:{artist_name} track:{track_name}", type="track")
        return tracks["tracks"]["items"][0]["uri"]
    except:
        return None


# run from 0 until len(df)
for i in range(24000, 30000):
    df.loc[i, "track_uri"] = get_track_uri(df, i)

    if i % 500 == 0:
        df.to_csv("processed/played_songs.csv", index=False)
        print(f"=== Iteration {i} done ===")


=== Iteration 24000 done ===


In [None]:
features = {"danceability": [0], "energy": [0], "loudness": [0],
         "speechiness": [0], "acousticness": [0], "instrumentalness": [0],
         "liveness": [0], "valence": [0], "tempo": [0],
         "key": [0], "mode": [0], "time_signature": [0]}
temp = pd.DataFrame(features)
df = pd.concat([df, temp], axis=0)

In [60]:
# Get audio features of each track

def get_audio_features(track_uri):
    features_list = []
    if track_uri:
        try:
            features = sp.audio_features(track_uri)[0]
            features_list.append(features["danceability"])
            features_list.append(features["energy"])
            features_list.append(features["loudness"])
            features_list.append(features["speechiness"])
            features_list.append(features["acousticness"])
            features_list.append(features["instrumentalness"])
            features_list.append(features["liveness"])
            features_list.append(features["valence"])
            features_list.append(features["tempo"])
            features_list.append(features["key"])
            features_list.append(features["mode"])
            features_list.append(features["time_signature"])

            return features_list      
        except:
            return None


# run from 0 until len(df)
for i in range(3000, 5000):
    df.iloc[i, 5:] = get_audio_features(df.iloc[i, 4])

    if i % 200 == 0:
        df.to_csv("processed/played_songs_af.csv", index=False)
        print(f"=== Iteration {i} done ===")

print("=== Done ===")
            

=== Iteration 2000 done ===
=== Iteration 2200 done ===
=== Iteration 2400 done ===
=== Iteration 2600 done ===
=== Iteration 2800 done ===
=== Done ===


In [59]:
df.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed,track_uri,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,key,mode,time_signature
0,2020-12-18 08:12,Taylor Swift,evermore (feat. Bon Iver),304106.0,spotify:track:3O5osWf1rSoKMwe6E9ZaXP,0.39,0.27,-10.673,0.0308,0.937,0.00227,0.111,0.32,125.177,1.0,1.0,5.0
1,2020-12-18 08:13,Rachel Platten,Fight Song,32240.0,spotify:track:37f4ITSlgPX81ad2EvmVQr,0.564,0.714,-4.987,0.129,0.0549,0.0,0.155,0.33,175.924,7.0,1.0,4.0
2,2020-12-18 08:18,Two Steps from Hell,Star Sky,330579.0,spotify:track:06AMpcajziFnEKniV25fiU,0.443,0.822,-5.656,0.0508,0.00997,0.107,0.0889,0.361,130.055,2.0,0.0,4.0
3,2020-12-18 08:24,Thomas Bergersen,Empire of Angels,316533.0,spotify:track:3AnYGQ8PB3lYrA6ToVUXa3,0.349,0.485,-8.875,0.0347,0.127,0.914,0.0912,0.0383,102.921,2.0,0.0,4.0
4,2020-12-18 08:46,SVRCINA,Meet Me on the Battlefield,1207.0,spotify:track:440JCAtxU17JnElhbXjVl0,0.488,0.202,-12.878,0.0319,0.956,0.000303,0.121,0.352,127.727,9.0,0.0,4.0
