In [1]:
import spotipy

In [5]:
from tqdm import tqdm

In [2]:
from spotipy.oauth2 import SpotifyOAuth
from dotenv import load_dotenv, find_dotenv
import os
import math
from itertools import chain
import time
load_dotenv(find_dotenv())
scope = "user-top-read%20user-read-currently-playing%20user-read-playback-state%20playlist-read-collaborative%20playlist-read-private%20user-library-read%20user-read-recently-played%20user-follow-read"

In [12]:
import flatdict
import pandas as pd
tqdm.pandas()

  from pandas import Panel


In [8]:
def chunk(data, n):
    return [data[x:x+n] for x in range(0, len(data), n)]

In [3]:
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

In [4]:
def get_saved_tracks(page):
    return sp.current_user_saved_tracks(limit=50, offset=page*50)['items']

In [10]:
def get_track_features(track_ids):
    if len(track_ids)>100:
        print("Too many tracks")
    else:
        return sp.audio_features(track_ids)

In [6]:
all_results = []
results = sp.current_user_saved_tracks(limit=50)
total_results = results['total']
total_pages = math.ceil(total_results/50)
all_tracks = list(chain.from_iterable([get_saved_tracks(page) for page in tqdm(list(range(total_pages)))]))

100%|██████████| 48/48 [00:06<00:00,  7.30it/s]


In [7]:
all_track_ids = [track['track']['id'] for track in all_tracks]

In [9]:
chunked_track_ids = chunk(all_track_ids, 100)

In [11]:
all_audio_features = [val for val in list(chain.from_iterable([get_track_features(chunked_tracks) for chunked_tracks in tqdm(chunked_track_ids)])) if val]

100%|██████████| 24/24 [00:04<00:00,  5.01it/s]


In [13]:
flattened_tracks = [dict(flatdict.FlatterDict(track)) for track in tqdm(all_tracks)]

100%|██████████| 2393/2393 [00:02<00:00, 940.45it/s] 


In [14]:
tracks_df = pd.DataFrame(flattened_tracks)
market_cols = [col for col in tracks_df.columns if "market" in col]
#Drop columns with more than 60% nulls
subset_tracks_df = tracks_df.drop(market_cols, axis=1).dropna(axis=1, thresh=int(0.6*len(tracks_df)))

In [15]:
subset_tracks_df.head(1)

Unnamed: 0,added_at,track:album:album_type,track:album:artists:0:external_urls:spotify,track:album:artists:0:href,track:album:artists:0:id,track:album:artists:0:name,track:album:artists:0:type,track:album:artists:0:uri,track:album:external_urls:spotify,track:album:href,...,track:external_urls:spotify,track:href,track:id,track:is_local,track:name,track:popularity,track:preview_url,track:track_number,track:type,track:uri
0,2020-12-18T22:10:50Z,single,https://open.spotify.com/artist/0LyfQWJT6nXafL...,https://api.spotify.com/v1/artists/0LyfQWJT6nX...,0LyfQWJT6nXafLPZqxe9Of,Various Artists,artist,spotify:artist:0LyfQWJT6nXafLPZqxe9Of,https://open.spotify.com/album/04R8E7DvTPx6tSw...,https://api.spotify.com/v1/albums/04R8E7DvTPx6...,...,https://open.spotify.com/track/2lCinwx9z9Fmr8k...,https://api.spotify.com/v1/tracks/2lCinwx9z9Fm...,2lCinwx9z9Fmr8kcXDei0c,False,AUTOMATIC REMIX,49,https://p.scdn.co/mp3-preview/b595bcdc2583d775...,1,track,spotify:track:2lCinwx9z9Fmr8kcXDei0c


In [16]:
track_features_df = pd.DataFrame(all_audio_features)

In [17]:
track_features_df.head(1)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.741,0.654,7,-5.485,0,0.0495,0.145,0.0,0.436,0.539,100.003,audio_features,2lCinwx9z9Fmr8kcXDei0c,spotify:track:2lCinwx9z9Fmr8kcXDei0c,https://api.spotify.com/v1/tracks/2lCinwx9z9Fm...,https://api.spotify.com/v1/audio-analysis/2lCi...,938893,4


In [18]:
tracks_and_features_df = pd.merge(subset_tracks_df, track_features_df, left_on="track:id", right_on="id")

In [19]:
tracks_and_features_df.head(1)

Unnamed: 0,added_at,track:album:album_type,track:album:artists:0:external_urls:spotify,track:album:artists:0:href,track:album:artists:0:id,track:album:artists:0:name,track:album:artists:0:type,track:album:artists:0:uri,track:album:external_urls:spotify,track:album:href,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,2020-12-18T22:10:50Z,single,https://open.spotify.com/artist/0LyfQWJT6nXafL...,https://api.spotify.com/v1/artists/0LyfQWJT6nX...,0LyfQWJT6nXafLPZqxe9Of,Various Artists,artist,spotify:artist:0LyfQWJT6nXafLPZqxe9Of,https://open.spotify.com/album/04R8E7DvTPx6tSw...,https://api.spotify.com/v1/albums/04R8E7DvTPx6...,...,0.436,0.539,100.003,audio_features,2lCinwx9z9Fmr8kcXDei0c,spotify:track:2lCinwx9z9Fmr8kcXDei0c,https://api.spotify.com/v1/tracks/2lCinwx9z9Fm...,https://api.spotify.com/v1/audio-analysis/2lCi...,938893,4


In [25]:
tracks_and_features_df.to_csv("SpotifySongs.csv", index=False)

In [24]:
#TODO: Also include songs in playlists that arent in liked songs?