In [1]:
import pandas as pd
import json
import requests
import re
import numpy as np
import matplotlib.pyplot as pyplot
import seaborn as sns

In [2]:
# read your 1+ StreamingHistory files (depending on how extensive your streaming history is) into pandas dataframes
stream = pd.read_json('C:\\Users\\user\\datasets\\StreamingHistory0.json')

# create a 'UniqueID' for each song by combining the fields 'artistName' and 'trackName'
stream['UniqueID'] = (stream['artistName'] +  ":  "  + stream['trackName'])

stream.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID
0,2020-09-30 10:56,Spotify,It's Wednesday,5845,Spotify: It's Wednesday
1,2020-09-30 10:56,Spotify,It's Wednesday,0,Spotify: It's Wednesday
2,2020-09-30 10:57,Today At The Ringer,Wentz’s Struggles and the Worst Quarterback Si...,25644,Today At The Ringer: Wentz’s Struggles and th...
3,2020-09-30 11:04,Lil Wayne,Uproar,194184,Lil Wayne: Uproar
4,2020-09-30 11:07,Diplo,Welcome to the Party (with French Montana & Li...,181720,Diplo: Welcome to the Party (with French Mont...


In [3]:
# Look at NaN values
stream.isna().sum()

endTime       0
artistName    0
trackName     0
msPlayed      0
UniqueID      0
dtype: int64

In [4]:
# remove unkowns
stream.drop([8,9,10,11,12,13,14,15,16], axis=0).head(10)

Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID
0,2020-09-30 10:56,Spotify,It's Wednesday,5845,Spotify: It's Wednesday
1,2020-09-30 10:56,Spotify,It's Wednesday,0,Spotify: It's Wednesday
2,2020-09-30 10:57,Today At The Ringer,Wentz’s Struggles and the Worst Quarterback Si...,25644,Today At The Ringer: Wentz’s Struggles and th...
3,2020-09-30 11:04,Lil Wayne,Uproar,194184,Lil Wayne: Uproar
4,2020-09-30 11:07,Diplo,Welcome to the Party (with French Montana & Li...,181720,Diplo: Welcome to the Party (with French Mont...
5,2020-09-30 11:09,Kash Doll,Ready Set (feat. Big Sean),140180,Kash Doll: Ready Set (feat. Big Sean)
6,2020-09-30 11:13,Big Sean,Bezerk (feat. A$AP Ferg),86820,Big Sean: Bezerk (feat. A$AP Ferg)
7,2020-09-30 11:13,Sufjan Stevens,Run Away With Me,26540,Sufjan Stevens: Run Away With Me
17,2020-09-30 11:20,Think About It,Eternal Life Is Why All The Research Is Being ...,53090,Think About It: Eternal Life Is Why All The R...
18,2020-09-30 11:30,Sufjan Stevens,Lamentations,28760,Sufjan Stevens: Lamentations


In [5]:
with open("C:\\Users\\user\\datasets\\YourLibrary.json","r") as fp: #r - open file in read mode
 data = json.load(fp)
 data = data['tracks']
# read your edited Library into a pandas dataframe
library = pd.DataFrame.from_dict(data)

# add UniqueID column (same as above)
library['UniqueID'] = library['artist'] + ": " + library['track']

# add column with track URI stripped of 'spotify:track:'
new = library["uri"].str.split(":", expand = True)
library['track_uri'] = new[2]

library.head(10)

Unnamed: 0,artist,album,track,uri,UniqueID,track_uri
0,Louis Prima and His New Orleans,BD Music Presents Christmas Jazz,What Will Santa Claus Say (When He Finds Every...,spotify:track:6e7S73CwnsFZOvlr5lPhMG,Louis Prima and His New Orleans: What Will San...,6e7S73CwnsFZOvlr5lPhMG
1,Brent Faiyaz,"Gravity (feat. Tyler, The Creator)","Gravity (feat. Tyler, The Creator)",spotify:track:6u3CPnFMKANYgfdiifFOiJ,"Brent Faiyaz: Gravity (feat. Tyler, The Creator)",6u3CPnFMKANYgfdiifFOiJ
2,A$AP Ferg,ALWAYS STRIVE AND PROSPER,Rebirth,spotify:track:6wNEJZxKoKEKEDd911PRu5,A$AP Ferg: Rebirth,6wNEJZxKoKEKEDd911PRu5
3,Big Sean,Dark Sky Paradise,Blessings,spotify:track:1bzM1cd6oqFozdr4wK6HdR,Big Sean: Blessings,1bzM1cd6oqFozdr4wK6HdR
4,VIC MENSA,I TAPE,VICTORY,spotify:track:6h8EXUR98HmYoe3qca9oPW,VIC MENSA: VICTORY,6h8EXUR98HmYoe3qca9oPW
5,Machine Gun Kelly,bloom,Golden God,spotify:track:1jMAzf2MOxCgI0bNtssXvA,Machine Gun Kelly: Golden God,1jMAzf2MOxCgI0bNtssXvA
6,Justin Bieber,Purpose,What Do You Mean?,spotify:track:4B0JvthVoAAuygILe3n4Bs,Justin Bieber: What Do You Mean?,4B0JvthVoAAuygILe3n4Bs
7,Nancy Wilson,Come Get To This,If I Ever Lose This Heaven,spotify:track:0ZVzhyYuOFcH59lyD63QvV,Nancy Wilson: If I Ever Lose This Heaven,0ZVzhyYuOFcH59lyD63QvV
8,Vince Staples,Vince Staples,THE SHINING,spotify:track:4JvTUper6kKrGUuOPsdhI2,Vince Staples: THE SHINING,4JvTUper6kKrGUuOPsdhI2
9,Matt Corby,Made of Stone,Made of Stone,spotify:track:75kHuG6iFGA7lOMKPiojr0,Matt Corby: Made of Stone,75kHuG6iFGA7lOMKPiojr0


In [6]:
# create final dict as a copy stream df
tableau = stream.copy()

# add column checking if streamed song is in library
# not used in this project but could be helpful for cool visualizations
tableau['In Library'] = np.where(tableau['UniqueID'].isin(library['UniqueID'].tolist()),1,0)

# left join with df_library on UniqueID to bring in album and track_uri
tableau = pd.merge(tableau, library[['album','UniqueID']],how='left',on=['UniqueID'])

# Remove unknowns
tableau = tableau.drop(tableau.index[tableau["artistName"]=="Unknown Artist"])
tableau.reset_index()
tableau.head(10)

Unnamed: 0,endTime,artistName,trackName,msPlayed,UniqueID,In Library,album
0,2020-09-30 10:56,Spotify,It's Wednesday,5845,Spotify: It's Wednesday,0,
1,2020-09-30 10:56,Spotify,It's Wednesday,0,Spotify: It's Wednesday,0,
2,2020-09-30 10:57,Today At The Ringer,Wentz’s Struggles and the Worst Quarterback Si...,25644,Today At The Ringer: Wentz’s Struggles and th...,0,
3,2020-09-30 11:04,Lil Wayne,Uproar,194184,Lil Wayne: Uproar,0,
4,2020-09-30 11:07,Diplo,Welcome to the Party (with French Montana & Li...,181720,Diplo: Welcome to the Party (with French Mont...,0,
5,2020-09-30 11:09,Kash Doll,Ready Set (feat. Big Sean),140180,Kash Doll: Ready Set (feat. Big Sean),0,
6,2020-09-30 11:13,Big Sean,Bezerk (feat. A$AP Ferg),86820,Big Sean: Bezerk (feat. A$AP Ferg),0,
7,2020-09-30 11:13,Sufjan Stevens,Run Away With Me,26540,Sufjan Stevens: Run Away With Me,0,
17,2020-09-30 11:20,Think About It,Eternal Life Is Why All The Research Is Being ...,53090,Think About It: Eternal Life Is Why All The R...,0,
18,2020-09-30 11:30,Sufjan Stevens,Lamentations,28760,Sufjan Stevens: Lamentations,0,


In [7]:
# save your IDs from new project in Spotify Developer Dashboard
CLIENT_ID = "853ff2e002da41509999797a2bd0128f"
CLIENT_SECRET = "77aa598e076e4d4a91ecaac792b7ead0"

In [8]:
# generate access token

# authentication URL
AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': "853ff2e002da41509999797a2bd0128f",
    'client_secret': "77aa598e076e4d4a91ecaac792b7ead0",
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [9]:
# used for authenticating all API calls
headers = {'Authorization': 'Bearer {token}'.format(token=access_token)}

In [10]:
# base URL of all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

In [11]:
# create blank dictionary to store track URI, artist URI, and genres
dict_genre = {}

# convert track_uri column to an iterable list
track_uris = library['track_uri'].to_list()

# loop through track URIs and pull artist URI using the API,
# then use artist URI to pull genres associated with that artist
# store all these in a dictionary
for t_uri in track_uris:
    
    dict_genre[t_uri] = {'artist_uri': "", "genres":[]}
    
    r = requests.get(BASE_URL + 'tracks/' + t_uri, headers=headers)
    r = r.json()
    a_uri = r["artists"][0]['uri'].split(':')[2]
    dict_genre[t_uri]['artist_uri'] = a_uri
    
    s = requests.get(BASE_URL + 'artists/' + a_uri, headers=headers)
    s = s.json()
    dict_genre[t_uri]['genres'] = s['genres']

In [13]:
# convert dictionary into dataframe with track_uri as the first column
genre = pd.DataFrame.from_dict(dict_genre, orient='index')
genre.insert(0, 'track_uri', genre.index)
genre.reset_index(inplace=True, drop=True)

genre.head()

Unnamed: 0,track_uri,artist_uri,genres
0,6e7S73CwnsFZOvlr5lPhMG,2LmkgfMMLXdQiJQSRRoxKY,[]
1,6u3CPnFMKANYgfdiifFOiJ,3tlXnStJ1fFhdScmQeLpuG,"[dmv rap, pop, r&b, rap]"
2,6wNEJZxKoKEKEDd911PRu5,5dHt1vcEm9qb8fCyLcB3HL,"[hip hop, pop rap, rap, southern hip hop, trap..."
3,1bzM1cd6oqFozdr4wK6HdR,0c173mlxpT3dSFRgMO8XPh,"[detroit hip hop, hip hop, pop, pop rap, rap, ..."
4,6h8EXUR98HmYoe3qca9oPW,27w1NoOLMX7tJMYqcetPyG,"[chicago rap, conscious hip hop, hip hop, pop ..."


In [14]:
genre_expanded = genre.explode('genres')
genre_expanded.head()

Unnamed: 0,track_uri,artist_uri,genres
0,6e7S73CwnsFZOvlr5lPhMG,2LmkgfMMLXdQiJQSRRoxKY,
1,6u3CPnFMKANYgfdiifFOiJ,3tlXnStJ1fFhdScmQeLpuG,dmv rap
1,6u3CPnFMKANYgfdiifFOiJ,3tlXnStJ1fFhdScmQeLpuG,pop
1,6u3CPnFMKANYgfdiifFOiJ,3tlXnStJ1fFhdScmQeLpuG,r&b
1,6u3CPnFMKANYgfdiifFOiJ,3tlXnStJ1fFhdScmQeLpuG,rap


In [15]:
# save df_tableau and df_genre_expanded as csv files that we can load into Tableau
tableau.to_csv('MySpotifyDataTable.csv')
genre_expanded.to_csv('GenresExpandedTable.csv')

print('done')

done


In [None]:
import jovian
jovian.commit(filename="Spotify.ipynb")