In [1]:
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import spotipy.util as util
import pprint
from tqdm.notebook import tqdm
import re
from dateutil.parser import isoparse as dateparse
from datetime import datetime, timedelta
import pickle
from sp_client import Spotify_Client

pp = pprint.PrettyPrinter(indent=4)

In [3]:
sp = Spotify_Client()

In [4]:
#save playlist uris
with open('ref/playlists.data', 'wb') as f:
    pickle.dump({p['uri'] for p in sp.current_user_playlists()['items']}, f)

In [5]:
def get_track_info(track_dict):
    track = track_dict['track']
    name = track['name']
    artist = track['artists'][0]['name']
    uri = track['uri']
    
    return artist, name, uri
   

In [12]:
def playlist_scraper(playlist_id, processed, sp = sp):
    #get all songs in given playlist
    cols =['danceability', 'energy', 'loudness', 'speechiness', 'acousticness','liveness', 'valence', 'tempo']
    playlist = sp.playlist_tracks(playlist_id)['items']
    data = []      #list of dicts of audio features
    
    for track in tqdm(playlist):
        artist, name, uri = get_track_info(track)
        if uri not in processed:
            row = dict()
            af = sp.audio_features(uri)[0]
            del af["type"], af["id"], af["track_href"], af["analysis_url"], af['duration_ms']
            row['artist'] = artist
            row['title'] = name
            row['uri'] = uri

            for c in cols:
                row[c] = af[c]
            
            data.append(row)
            processed.add(uri)
    return data, processed

In [13]:
def all_scraper(sp = sp, playlist_names = ["test_1","test_2", "test_3"], save = False, csv_path = "data/all_songs.csv"):
    #scrape all songs in given playlists
    playlists = sp.current_user_playlists()['items']
    playlists = list(filter(lambda x: x['name'] in playlist_names, playlists))  #only get songs from specified playlists
    p_ids = [p['id'] for p in playlists]
    processed = set() #set of uris, no duplicate songs
    data = [] #song features
    
    for p_id in tqdm(p_ids):
        songs, processed = playlist_scraper(p_id, processed, sp)
        data.extend(songs)

    all_songs = pd.DataFrame(data)
    all_songs.set_index('uri', inplace = True)
    return all_songs
        
    


In [14]:
all_songs = all_scraper()

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=41.0), HTML(value='')))





In [17]:
all_songs.iloc[0,2:].values

array([0.325, 0.578, -6.786, 0.0455, 0.158, 0.575, 0.103, 144.745],
      dtype=object)

In [15]:
all_songs

Unnamed: 0_level_0,artist,title,danceability,energy,loudness,speechiness,acousticness,liveness,valence,tempo
uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
spotify:track:1iaTDu4PCIWQQOwwwqq5qW,Said the Sky,All I Got,0.325,0.578,-6.786,0.0455,0.158000,0.575,0.103,144.745
spotify:track:6Q3K9gVUZRMZqZKrXovbM2,Kygo,I'll Wait,0.611,0.651,-6.973,0.0587,0.148000,0.105,0.357,122.983
spotify:track:75ZKw8JLaFsYr51J44fQ4N,joan,drive all night,0.542,0.758,-5.032,0.0313,0.000184,0.144,0.430,156.128
spotify:track:6Wz9rIfo9tOBcVCd1Mo7F7,Nightly,Twenty Something,0.641,0.589,-6.305,0.0275,0.096000,0.208,0.110,100.001
spotify:track:1Srt81FTalOhRw7t7l8Yh8,Lauv,Tattoos Together,0.862,0.400,-7.486,0.0736,0.005750,0.123,0.596,122.963
...,...,...,...,...,...,...,...,...,...,...
spotify:track:7uzmGiiJyRfuViKKK3lVmR,Bazzi,Mine,0.710,0.789,-3.874,0.0722,0.016100,0.451,0.717,142.929
spotify:track:2cvOfKHOHgwQlLiuLKP2xR,Post Malone,Myself,0.476,0.701,-4.234,0.0349,0.102000,0.242,0.405,163.882
spotify:track:1YuIEfhp8umr6QSal4FTK4,John K,OT,0.734,0.443,-6.466,0.0788,0.434000,0.107,0.617,79.485
spotify:track:27as7exfxU3cNaYKr3HlHI,Lauv,Mean It - stripped,0.538,0.228,-10.837,0.0334,0.746000,0.126,0.517,90.912


In [16]:
all_songs.to_csv("data/all_songs1.csv")