# Spotify Personal History Analysis

Spotify API Documentation:
* [Developer Dashboard](https://developer.spotify.com/dashboard/applications)
* [Spotify API Documentation](https://developer.spotify.com/documentation/web-api/reference/)

Spotipy (Python wrapper):
* [readthedocs](https://spotipy.readthedocs.io/en/latest/)
* [Github example scripts](https://github.com/plamere/spotipy/tree/master/examples)

In [1]:
import os
import pandas as pd
import requests
import json
import spotipy
from spotipy import util
import tqdm
tqdm.tqdm.pandas()
# from concurrent.futures import ThreadPoolExecutor

  from pandas import Panel


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

### spotify data exports

In [3]:
# with open("../data/spotify_archive/my_spotify_data_complete/MyData/AddedToPlaylist.json", "r") as json_file:
#     data = json.load(json_file)

In [4]:
file_names = ["StreamingHistory"+str(i)+".json" for i in range(1,6)]

streaming_history = []
for f in file_names:
    with open(f"../data/spotify_archive/my_spotify_data_201912/MyData/{f}", "rb") as json_file:
        data = json.load(json_file, encoding="utf8")
    streaming_history = streaming_history + data

In [5]:
streaming_history = pd.DataFrame(streaming_history)

In [6]:
streaming_history.head()

Unnamed: 0,endTime,artistName,trackName,msPlayed
0,2019-02-25 23:12,AOBeats,Epilogue,185517
1,2019-02-25 23:17,AOBeats,Epilogue,11643
2,2019-02-25 23:20,AOBeats,Epilogue,197160
3,2019-02-25 23:24,AOBeats,Epilogue,197160
4,2019-02-25 23:27,AOBeats,Epilogue,197160


__create df of unique songs to get spotify info__

In [7]:
unique_streams = streaming_history[["artistName","trackName"]].drop_duplicates().reset_index(drop=True)

In [8]:
unique_streams["search_str"] =  "artist:" + unique_streams["artistName"] + " " + "track:" + unique_streams["trackName"]

In [9]:
unique_streams.head()

Unnamed: 0,artistName,trackName,search_str
0,AOBeats,Epilogue,artist:AOBeats track:Epilogue
1,AOBeats,Duress,artist:AOBeats track:Duress
2,Mahalia,I Wish I Missed My Ex,artist:Mahalia track:I Wish I Missed My Ex
3,Dagny,Used To You,artist:Dagny track:Used To You
4,Prince Fox,Space (feat. Quinn XCII),artist:Prince Fox track:Space (feat. Quinn XCII)


### spotipy search

In [16]:
token = util.prompt_for_user_token(
    username=os.environ["SPOTIFY_USERNAME"], # change env var
    scope="user-top-read",
    client_id=os.environ["SPOTIFY_CLIENT_ID"],
    client_secret=os.environ["SPOTIFY_CLIENT_SECRET"],
    redirect_uri='http://localhost:5000/'
)

In [17]:
sp = spotipy.Spotify(auth=token)

In [18]:
def parse_search_results(resp):
    try:
        first_result = resp["tracks"]["items"][0]
        result_dict = {}

        artist_names = []
        for artist_list in first_result["artists"]:
            name = artist_list["name"]
            artist_names.append(name)
        result_dict["artists"] = artist_names
        
        for key in ["name", "popularity", "uri"]:
            result_dict[key] = first_result[key]
            
    except IndexError:
        result_dict = {}
    return result_dict

In [19]:
parse_search_results(sp.search(q="artist:Dagny track:Used To You",type="track",limit=1))

{'artists': ['Dagny'],
 'name': 'Used To You',
 'popularity': 43,
 'uri': 'spotify:track:7t3Ldw0z7zS0WyzlV7aJGV'}

__getting spotify track uri for all songs in streaming history__

In [20]:
def get_uri(search_str):
    resp = sp.search(q=search_str,type="track",limit=1)
    results = parse_search_results(resp)
    results["search_str"] = search_str
    return results

In [21]:
get_uri("artist:Dagny track:Used To You")

{'artists': ['Dagny'],
 'name': 'Used To You',
 'popularity': 43,
 'uri': 'spotify:track:7t3Ldw0z7zS0WyzlV7aJGV',
 'search_str': 'artist:Dagny track:Used To You'}

In [None]:
search_list = unique_streams["search_str"].values
search_results = []
for search_str in tqdm.tqdm(search_list):
    results = get_uri(search_str)
    search_results.append(results)

 48%|████▊     | 3273/6790 [09:15<09:58,  5.87it/s]  

In [None]:
search_list = unique_streams["search_str"].values

In [None]:
# with ThreadPoolExecutor(max_workers=50) as executor:
#     results = executor.map(get_uri, search_list)

__getting top tracks from spotify api__

In [34]:
top_tracks = sp.current_user_top_tracks(time_range="short_term", limit=50)

In [33]:
print(sp.current_user_top_tracks.__doc__)

 Get the current user's top tracks

            Parameters:
                - limit - the number of entities to return
                - offset - the index of the first entity to return
                - time_range - Over what time frame are the affinities computed
                  Valid-values: short_term, medium_term, long_term
        


In [35]:
top_tracks_extracted = []
for t in top_tracks["items"]:
    artist_names = []
    for a in t["artists"]:
        artist_names.append(a["name"])
    top_tracks_extracted.append([t["uri"], t["name"], artist_names, t["popularity"], t["album"]["release_date"]])

In [36]:
top_tracks_df = pd.DataFrame(top_tracks_extracted, columns=["uri","track_name","artists","popularity","release_data"])

In [37]:
uri_list = top_tracks_df["uri"].values

In [39]:
top_tracks_df

Unnamed: 0,uri,track_name,artists,popularity,release_data
0,spotify:track:7eJMfftS33KTjuF7lTsMCx,death bed (coffee for your head) (feat. beabad...,"[Powfu, beabadoobee]",98,2020-02-08
1,spotify:track:6S0nD6Kb9XYIC7z2S98dcQ,Moon (feat. Vancouver Sleep Clinic),"[OTR, Vancouver Sleep Clinic]",51,2020-03-06
2,spotify:track:61UQpK8caxBw9DzrbSpxbx,You Are,[Tvvin],37,2019-03-06
3,spotify:track:2ptqYXyQ1QdArGjbrTzBaU,Crazier Things,[Chelsea Cutler],62,2020-01-17
4,spotify:track:6LIEY7maPOCS5HILyaZRs9,Body,[Fintan],14,2019-03-22
5,spotify:track:2SL45jubSaBTVkToMFIreZ,Right?,[McCall],23,2019-08-16
6,spotify:track:7F2aEO8pNlweq3JWJyeyig,How Does It Feel,[papichuloteej],45,2020-01-28
7,spotify:track:0D2AMu7xJhEioPG8UYlynm,lovesick. (Yoke Lore Remix),"[FINLAY, Yoke Lore]",28,2019-07-12
8,spotify:track:4xxF57q6wmLa400D3AA2u2,The Weather,[Lawrence],53,2020-02-21
9,spotify:track:0d3tkd32FV4UzyQpMsmEUD,Play God,[Refs],29,2019-06-07


__track info for top tracks__

In [26]:
audio_features = []
audio_analysis = []
for uri in tqdm.tqdm(uri_list):
    af = sp.audio_features(uri)
    aa = sp.audio_analysis(uri)
    audio_features.append(af[0])
    audio_analysis.append(aa)

100%|██████████| 50/50 [00:28<00:00,  1.75it/s]


In [27]:
af_df = pd.DataFrame(audio_features)

In [28]:
aa_df = pd.DataFrame(audio_analysis)

In [29]:
aa.keys()

dict_keys(['meta', 'track', 'bars', 'beats', 'tatums', 'sections', 'segments'])

In [30]:
top_tracks_df = top_tracks_df.merge(af_df,on="uri",how="left")
top_tracks_df = top_tracks_df.merge(a)

TypeError: Can only merge Series or DataFrame objects, a <class 'dict'> was passed

In [None]:
top_tracks_df.describe()