In [None]:
!pip install -r requirements.txt



You should consider upgrading via the 'C:\Users\corsi\OneDrive - Umich\personal_projects\spotify_project\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [None]:
import spotipy
import numpy as np
import argparse
import pprint
import sys
import os
import subprocess
import json
import spotipy
import spotipy.util as util
import pandas as pd
import time
from spotipy.oauth2 import SpotifyClientCredentials 
import matplotlib.pyplot as plt
import secrets


cid = secrets.secrets()['cid']
secret = secrets.secrets()['secret']
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret) 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager,requests_timeout=12)

username = secrets.secrets()['username']

scope = 'user-library-read'

token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')




### Convience functions

In [None]:
def show_tracks(tracks):
    for i, item in enumerate(tracks['items']):
        track = item['track']
        print (" %d %s %s" % (i, track['artists'][0]['name'],track['name']))

def get_track_features(track_id,sp):
    if track_id is None:
        return None
    else:
        features = sp.audio_features([track_id])
    return features

def get_features(tracks,sp):
    tracks_with_features=[]

    for track in tracks:
        features = get_track_features(track['id'],sp)
        print (track['name'])
        if not features:
            print("passing track %s" % track['name'])
            pass
        else:
            f = features[0]
            tracks_with_features.append(dict(
                                            name=track['name'],
                                            artist=track['artist'],
                                            id=track['id'],
                                            danceability=f['danceability'],
                                            energy=f['energy'],
                                            loudness=f['loudness'],
                                            speechiness=f['speechiness'],
                                            acousticness=f['acousticness'],
                                            tempo=f['tempo'],
                                            liveness=f['liveness'],
                                            valence=f['valence']
                                            ))

        # time.sleep(0.1)

    # print(tracks_with_features[0])
    return tracks_with_features

def get_tracks_from_playlists(username, sp):
    playlists = sp.user_playlists(username)
    trackList = []
    for playlist in playlists['items']:
        if playlist['owner']['id'] == username:
            print (playlist['name'],' no. of tracks: ',playlist['tracks']['total'])
            results = sp.user_playlist(username, playlist['id'],fields="tracks,next")
            tracks = results['tracks']
            for i, item in enumerate(tracks['items']):
                track = item['track']
                trackList.append(dict(name=track['name'], id=track['id'], artist=track['artists'][0]['name']))

    # print(trackList[0])
    return trackList

def write_to_csv(track_features):
    df = pd.DataFrame(track_features)
    df.drop_duplicates(subset=['name','artist'])
    print ('Total tracks in data set', len(df))
    df.to_csv('mySongsDataset.csv',index=False)

def main(username):
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    print ("Getting user tracks from playlists")
    tracks = get_tracks_from_playlists(username, sp)
    print ("Getting track audio features")
    tracks_with_features = get_features(tracks,sp)
    print ("Storing into csv")
    #write_to_csv(tracks_with_features)
    



### Get liked songs dataset with features

In [None]:
scope = 'user-library-read'

token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')

if token:
    sp = spotipy.Spotify(auth=token)
    tracks_with_features=[]
    total = sp.current_user_saved_tracks(limit=1, offset=100000)['total'] #bogus total to get api total
    for j in np.arange(0, total, 1):
        try:
            results = sp.current_user_saved_tracks(limit=1, offset=j)
            for item in results['items']:
                track = item['track']
                id=track['id']

                features = sp.audio_features(id)
                f = features[0]

                tracks_with_features.append(dict(
                                                name=track['name'],
                                                artist=track['artists'][0]['name'],
                                                id=track['id'],
                                                danceability=f['danceability'],
                                                energy=f['energy'],
                                                loudness=f['loudness'],
                                                speechiness=f['speechiness'],
                                                acousticness=f['acousticness'],
                                                tempo=f['tempo'],
                                                liveness=f['liveness'],
                                                valence=f['valence']
                                                ))
        except:
            print(f'end of tracks. number of tracks are {len(tracks_with_features)}')

#write_to_csv(tracks_with_features)    


## Get all streaming data and link

In [None]:
import glob
import pandas as pd
import json

# get data file names
path =r"D:\Downloads\my_spotify_data\MyData"
filenames = glob.glob(path + "/StreamingHistory*.json")

dfs = []
for filename in filenames:
    dfs.append(pd.read_json(filename))

# Concatenate all data into one DataFrame
big_frame = pd.concat(dfs, ignore_index=True)

In [None]:
def get_id (row, sp=sp, token=token, scope=scope)):
  scope = 'user-library-read'
  try:
    return sp.search(q = 'artist:' + row['artistName'] + ' track:' + row['trackName'], type='track' )['tracks']['items'][0]['id']
  
  except spotipy.client.SpotifyException:
      token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')
      sp = spotipy.Spotify(auth=token,requests_timeout=120)       
      return sp.search(q = 'artist:' + row['artistName'] + ' track:' + row['trackName'], type='track' )['tracks']['items'][0]['id']

In [None]:
def get_genre (row, sp=sp, token=token, scope=scope):
  scope = 'user-library-read'
  try:    
    return sp.search(q =row['artistName'], type='artist')['artists']['items'][0]['genres']
  
  except spotipy.client.SpotifyException: # have to do in function becuse exception doesn't get caught properly otherwise
      token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')
      sp = spotipy.Spotify(auth=token,requests_timeout=12)
      return sp.search(q =row['artistName'], type='artist')['artists']['items'][0]['genres']    

  except IndexError:
    return ['None']

### Get unique song URIs from spotipy API

In [None]:
import pickle
big_list = []
step = 1000
start = 0
scope = 'user-library-read'

#token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')

#sp = spotipy.Spotify(auth=token,requests_timeout=120)
for size in range(0,37473,1000):
    big_list = []
    try:
      big_list.extend(get_id(big_frame.iloc[start:step]['artistName']))
      start += 1000
      step += 1000
    except spotipy.client.SpotifyException: # dummy block that doesn't actually do anything just keeping here for now
        
      token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')
      sp = spotipy.Spotify(auth=token,requests_timeout=120)       
      big_list.extend(get_genre(big_frame.iloc[start:step]['artistName']))
      start += 1000
      step += 1000
  
    except IndexError: # case where artist name doesn't return anything
      big_list.extend(['None'])
      start += 1000
      step += 1000

    with open('song_ids.data','wb') as f:
      pickle.dump(big_list,f)

      


### Get genres

In [None]:
import pickle
big_list = []
step = 1000
start = 0


for size in range(0,37473,1000): #loop in 1000 chunks
    big_list = []
    try:
      big_list.extend(big_frame.iloc[start:step].apply(get_genre,axis=1).to_list())
      start += 1000
      step += 1000

    except spotipy.client.SpotifyException: # dummy block that doesn't actually do anything just keeping here for now
        
        token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')
        sp = spotipy.Spotify(auth=token,requests_timeout=120)       
        big_list.extend(big_frame.iloc[start:step].apply(get_genre,axis=1).to_list())
        start += 1000
        step += 1000   
    file = f'pickle/song_genres{size}.data'
    with open(file,'wb') as f:
        pickle.dump(big_list,f)

# big_list = []

big_list.extend(big_frame.iloc[(start-1000):].apply(get_genre,axis=1).to_list())
file = f'pickle/song_genres_end.data'
with open(file,'wb') as f:
    pickle.dump(big_list,f)


### Get song Features once have URIs

In [None]:
new_feats = []

with open('full_ids.data', 'rb') as f:
  one = pickle.load(f)

scope = 'user-library-read'

token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')

sp = spotipy.Spotify(auth=token,requests_timeout=120)
for track in range(0,37473):
    try:
        f = sp.audio_features(tracks=one[track])[0]


        new_feats.append(dict(          danceability=f['danceability'],
                                      energy=f['energy'],
                                      loudness=f['loudness'],
                                      speechiness=f['speechiness'],
                                      acousticness=f['acousticness'],
                                      tempo=f['tempo'],
                                      liveness=f['liveness'],
                                      valence=f['valence'],
                                      duration=f['duration_ms']
                                        ))
    except spotipy.client.SpotifyException:
        
        token = util.prompt_for_user_token(username,scope,client_id=cid, client_secret=secret,redirect_uri='http://localhost/')
        sp = spotipy.Spotify(auth=token,requests_timeout=120)       
        f = sp.audio_features(tracks=one[track])[0]


        new_feats.append(dict(          danceability=f['danceability'],
                                      energy=f['energy'],
                                      loudness=f['loudness'],
                                      speechiness=f['speechiness'],
                                      acousticness=f['acousticness'],
                                      tempo=f['tempo'],
                                      liveness=f['liveness'],
                                      valence=f['valence'],
                                      duration=f['duration_ms']
                                        ))

    except TypeError:
        new_feats.append(dict(          danceability= np.nan,
                                          energy=np.nan,
                                          loudness=np.nan,
                                          speechiness=np.nan,
                                          acousticness=np.nan,
                                          tempo=np.nan,
                                          liveness=np.nan,
                                          valence=np.nan,
                                          duration=np.nan
                                            ))
    with open('feature_songs.data','wb') as f:
            pickle.dump(new_feats,f)

In [None]:
with open('song_ids.data', 'rb') as f:
  uris = pickle.load(f)

with open('song_genres.data', 'rb') as f:
  genres = pickle.load(f)


big_frame['id'] = uris
big_frame['genres'] = genres

In [None]:
with open('feature_songs.data', 'rb') as f:
  one = pickle.load(f)
big_frame = pd.concat([big_frame, pd.DataFrame(one)], axis=1).to_csv('full_spotify_df.csv')

In [None]:
genres = [item for sublist in t for item in sublist]
with open('genres_full.data','wb') as f:
        pickle.dump(genres,f)

In [None]:
with open(r"C:\Users\corsi\feats_test.data", 'rb') as f:
  feats = pickle.load(f)

In [None]:
big_frame['genres'] = genres

In [None]:
big_frame = pd.concat([big_frame, pd.DataFrame(feats)], axis=1)

In [None]:
big_frame.tail(500)

Unnamed: 0,endTime,artistName,trackName,msPlayed,genres,danceability,energy,loudness,speechiness,acousticness,tempo,liveness,valence,duration
36973,2021-09-19 18:58,Temple Of The Dog,Hunger Strike - 25th Anniversary Mix,246266,"[alternative metal, alternative rock, grunge, ...",0.384,0.735,-6.522,0.0317,0.02520,137.834,0.1140,0.312,246267.0
36974,2021-09-19 18:58,RAC,Hollywood,3753,"[alternative dance, electropop, indie poptimis...",0.945,0.546,-6.109,0.0519,0.11900,120.004,0.1210,0.962,197467.0
36975,2021-09-19 19:01,BabyJake,Blue Cellophane,181026,[pop rap],0.679,0.431,-9.563,0.0287,0.53400,114.971,0.1890,0.284,181027.0
36976,2021-09-19 19:04,Barns Courtney,"""99""",199413,"[modern alternative rock, modern rock, rock]",0.552,0.804,-4.299,0.0303,0.00598,95.980,0.1110,0.714,199413.0
36977,2021-09-19 19:08,Barns Courtney,Little Boy,233448,"[modern alternative rock, modern rock, rock]",0.654,0.621,-8.555,0.0328,0.38400,113.955,0.1190,0.410,233448.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37468,2021-09-23 23:15,keshi,blue - Jai Wolf Remix,202666,"[chill r&b, pop]",0.465,0.447,-8.116,0.0266,0.09070,90.082,0.1980,0.180,202667.0
37469,2021-09-23 23:19,America,A Horse with No Name,249367,"[album rock, art rock, classic rock, country r...",0.654,0.506,-17.180,0.0535,0.68600,123.179,0.1550,0.831,252240.0
37470,2021-09-23 23:23,Gypsy & The Cat,Time to Wander,234172,"[alternative dance, australian indie]",0.556,0.881,-4.624,0.0528,0.00174,135.025,0.0685,0.115,234173.0
37471,2021-09-23 23:28,MACKandgold,Take Me In,263920,[pop house],0.738,0.612,-5.945,0.0330,0.38500,110.015,0.1040,0.577,263920.0


In [None]:
# big_frame.to_csv('final_df.csv')