### Lab | API wrappers - Create your collection of songs & audio features

### 1. Getting started

#### 1. Import Libraries

In [1]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from random import randint
from time import sleep
from pandas import json_normalize

#### 2. Create a log_in file

In [2]:
# Consists of our spotify credentials to log in
secrets_file = open("secrets.txt","r") #r = opening for reading, needs to be specified

# Read the whole file in one go
string = secrets_file.read()

string.split('\n')

# Translate the set of strings in a dictionary
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()
        

#### 3. Log into SpotiPy

In [None]:
#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

### 2. Importing the large playlist

In [None]:
# we will need more songs for our clustering
playlist = sp.user_playlist_tracks("spotify", "5S8SJdl1BDc0ugpkEvFsIL")

In [None]:
playlist["total"]

In [None]:
def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        try:
            results = sp.next(results)
            tracks = tracks + results['items']
            sleep(randint(1,3))
        except:
            print('\nSomething failed.')
            time.sleep(5)
    return tracks

In [None]:
all_tracks = get_playlist_tracks("5S8SJdl1BDc0ugpkEvFsIL")
#all_tracks

In [None]:
# Now I have everything

#### 1. Get specific info

In [None]:
# Get the name
all_tracks[0]['track']['artists'][0]['name']

In [None]:
# Get the URL to the song
all_tracks[0]['track']['external_urls']['spotify']

In [None]:
# Get the song title
all_tracks[0]["track"]["name"]

In [None]:
# Get the URI
all_tracks[0]['track']['uri']

In [None]:
# Get popularity
all_tracks[1]['track']['popularity']

#### 2. Get the name, title, popularity, url and uri in a tuple

In [None]:
# create an empty dataframe with columns for artist and song
df = pd.DataFrame(columns=['artist','song','popularity', 'uri', 'url'])

# Iterate over the all_tracks dictionary and insert the artist and song into the dataframe
for track in all_tracks:
    artist_name = track['track']['artists'][0]['name']
    song_name = track['track']['name']
    popularity = track["track"]["popularity"]
    uri = track["track"]["uri"]
    url = track["track"]["external_urls"]
    df = df.append({'artist':artist_name,'song':song_name, 'popularity':popularity, 'uri':uri, 'url':url}, ignore_index=True)

print(df)

In [None]:
#df

In [None]:
df['url'] = json_normalize(df['url'])

In [None]:
#df

In [None]:
# Save to csv
df.to_csv('df.csv', index=False)

### 3. Get the audio features

In [None]:
df.head(2)

In [None]:
song_uri=df['uri'][0]

In [None]:
sp.audio_features(song_uri)

In [None]:
# Create a copy of the big dataset

In [None]:
test_big = df
test_big.shape

##### 1. Get the audio features in a list

In [None]:
# -- Step 1
audio_features = []
for uri in test_big["uri"]:
    audio_features.append(sp.audio_features(uri))

In [None]:
len(audio_features)

##### 2. Convert the audio features list into a dataframe

In [None]:
# -- Step 2 Convert into a dataframe
df_features = [pd.DataFrame(d, index=[i]) for i, d in enumerate(audio_features)]
df_features_full = pd.concat(df_features)


In [None]:
df_features_full

##### 3. Merge test_big and df_features full into one dataframe with an inner join (just keep the values that have the same uri)

In [None]:
test_big

In [None]:
# Inner join to only get the matching values
df_data = pd.merge(left = test_big,
                                  right = df_features_full,
                                  how = 'inner', 
                                  left_on = "uri", 
                                  right_on= "uri")
df_data

In [None]:
# Save everything as a csv file

In [None]:
df_data.to_csv("df_data.csv", index = False)

In [None]:
# audio_features = []
# for uri in df["uri"]:
#     try:
#         audio_features.append(sp.audio_features(uri))
#     except:
#         print('\nSomething failed.')
#         time.sleep(10)

In [None]:
test_big.to_csv("test_big.csv", index = False)

In [3]:
df_data = pd.read_csv('df_data.csv')
df_data

Unnamed: 0,artist,song,popularity,uri,url,audio_features,danceability,energy,key,loudness,...,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature,0
0,Taylor Swift,...Ready For It?,0,spotify:track:7zgqtptZvhf8GEmdsM2vp2,https://open.spotify.com/track/7zgqtptZvhf8GEm...,"[{'danceability': 0.615, 'energy': 0.779, 'key...",0.615,0.779,2.0,-6.454,...,0.1550,0.453,160.000,audio_features,7zgqtptZvhf8GEmdsM2vp2,https://api.spotify.com/v1/tracks/7zgqtptZvhf8...,https://api.spotify.com/v1/audio-analysis/7zgq...,208198.0,4.0,
1,Thomas Rhett,Life Changes,63,spotify:track:4Vxu50qVrQcycjRyJQaZLC,https://open.spotify.com/track/4Vxu50qVrQcycjR...,"[{'danceability': 0.687, 'energy': 0.845, 'key...",0.687,0.845,7.0,-4.370,...,0.0452,0.809,87.972,audio_features,4Vxu50qVrQcycjRyJQaZLC,https://api.spotify.com/v1/tracks/4Vxu50qVrQcy...,https://api.spotify.com/v1/audio-analysis/4Vxu...,190227.0,4.0,
2,Bruno Mars,24K Magic,81,spotify:track:6b8Be6ljOzmkOmFslEb23P,https://open.spotify.com/track/6b8Be6ljOzmkOmF...,"[{'danceability': 0.818, 'energy': 0.803, 'key...",0.818,0.803,1.0,-4.282,...,0.1530,0.632,106.970,audio_features,6b8Be6ljOzmkOmFslEb23P,https://api.spotify.com/v1/tracks/6b8Be6ljOzmk...,https://api.spotify.com/v1/audio-analysis/6b8B...,225983.0,4.0,
3,Ed Sheeran,Galway Girl,80,spotify:track:0afhq8XCExXpqazXczTSve,https://open.spotify.com/track/0afhq8XCExXpqaz...,"[{'danceability': 0.624, 'energy': 0.876, 'key...",0.624,0.876,9.0,-3.374,...,0.3270,0.781,99.943,audio_features,0afhq8XCExXpqazXczTSve,https://api.spotify.com/v1/tracks/0afhq8XCExXp...,https://api.spotify.com/v1/audio-analysis/0afh...,170827.0,4.0,
4,Ed Sheeran,Photograph,86,spotify:track:1HNkqx9Ahdgi1Ixy2xkKkL,https://open.spotify.com/track/1HNkqx9Ahdgi1Ix...,"[{'danceability': 0.614, 'energy': 0.379, 'key...",0.614,0.379,4.0,-10.480,...,0.0986,0.201,107.989,audio_features,1HNkqx9Ahdgi1Ixy2xkKkL,https://api.spotify.com/v1/tracks/1HNkqx9Ahdgi...,https://api.spotify.com/v1/audio-analysis/1HNk...,258987.0,4.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9990,Elton John,Funeral For A Friend / Love Lies Bleeding,0,spotify:track:4UFlPCB4THnQ9TlPHqIQow,https://open.spotify.com/track/4UFlPCB4THnQ9Tl...,"[{'danceability': 0.41, 'energy': 0.761, 'key'...",0.410,0.761,9.0,-8.507,...,0.2470,0.193,138.712,audio_features,4UFlPCB4THnQ9TlPHqIQow,https://api.spotify.com/v1/tracks/4UFlPCB4THnQ...,https://api.spotify.com/v1/audio-analysis/4UFl...,666572.0,4.0,
9991,Supertramp,Fool's Overture,55,spotify:track:5pSSEkT0963muzzIjsVkrs,https://open.spotify.com/track/5pSSEkT0963muzz...,"[{'danceability': 0.406, 'energy': 0.306, 'key...",0.406,0.306,3.0,-10.482,...,0.0727,0.073,135.272,audio_features,5pSSEkT0963muzzIjsVkrs,https://api.spotify.com/v1/tracks/5pSSEkT0963m...,https://api.spotify.com/v1/audio-analysis/5pSS...,652560.0,4.0,
9992,Yes,Heart of the Sunrise - 2003 Remaster,47,spotify:track:7gC6Rbllqf1yXNC02e5jz2,https://open.spotify.com/track/7gC6Rbllqf1yXNC...,"[{'danceability': 0.362, 'energy': 0.507, 'key...",0.362,0.507,1.0,-11.229,...,0.1130,0.456,146.641,audio_features,7gC6Rbllqf1yXNC02e5jz2,https://api.spotify.com/v1/tracks/7gC6Rbllqf1y...,https://api.spotify.com/v1/audio-analysis/7gC6...,634440.0,3.0,
9993,Paul McCartney,Venus And Mars / Rock Show / Jet - Live / Rema...,0,spotify:track:6Ff77WXC58MkhLE5A1qgY1,https://open.spotify.com/track/6Ff77WXC58MkhLE...,"[{'danceability': 0.331, 'energy': 0.733, 'key...",0.331,0.733,2.0,-8.671,...,0.9470,0.380,128.512,audio_features,6Ff77WXC58MkhLE5A1qgY1,https://api.spotify.com/v1/tracks/6Ff77WXC58Mk...,https://api.spotify.com/v1/audio-analysis/6Ff7...,620747.0,4.0,


# Appendix

In [None]:
# test_big = test_big.assign(audio_features = audio_features)
# test_big

In [None]:
# IGNORE IT
# ## -- Step 2
# test = test.assign(audio_features = audio_features)
# test

##### Get a small dataframe to try if things work

In [None]:
df_small = df.loc[df.song.isin(df.song.drop_duplicates().sample(frac=0.01, random_state=25))] 

In [None]:
df_small

In [None]:
# # We have to make the function resilient to errors in case the coinnection breaks
# def get_features_delayed(uri):
#     # First we try to get the features
#     try:
#         sleep(randint(1,1)) # respectful nap
#         features = sp.audio_features(uri)
#     # if it doesnt work we try again, for this we make the function recursive
#     except:
#         print('Error occured while getting feature names')
#         features = get_features_delayed(uri)
#     # Finally we return the features
#     return features


# # NOTES:
# # Save a file for every 200 songs

In [None]:
# df_small['features'] = df_small['uri'].apply(get_features_delayed)

In [None]:
df_small

In [None]:
# I can turn the first row around
json_normalize(df_small['features'].iloc[1])      

In [None]:
df_small

In [None]:
test = df_small

In [None]:
# SCHEINT ZU FUNKTIONIEREN
# -- Step 1
audio_features_small = []
for uri in test["uri"]:
    audio_features_small.append(sp.audio_features(uri))

In [None]:
## Step 2 -- Let's split it
df_list_test = [pd.DataFrame(d, index=[i]) for i, d in enumerate(audio_features_small)]
df_test = pd.concat(df_list_test)

In [None]:
df_data_small = pd.merge(left = test,
                                  right = df_test,
                                  how = 'inner', 
                                  left_on = "uri", 
                                  right_on= "uri")
df_data_small

In [None]:
df_data_small.to_csv('data_small.csv', index=False)

In [None]:
# ## -- Step 2
# test = test.assign(audio_features = audio_features)
# test

In [None]:
# audio_features = []
# for i in df['uri']:
#     audio_features.append(sp.audio_features(i))

In [None]:
#audio_features

In [None]:
# for i in df['uri']:
#     audio_features = sp.audio_features(i)
#     df = df.append({'audio_features':audio_features},ignore_index=False)

In [None]:
# for i in range(len(df_small['features'])):
#     json_normalize(df_small['features'].iloc[i])

In [None]:
# df_small_try = df_small
# df_small_try

In [None]:
# def flatten_features(df):
#     featurelist = []
#     for i in range(len(df['features'])):
#         try:
#             featurelist.append([
#             df['features'][i][0]['danceability'],
#             df['features'][i][0]['energy'],
#             df['features'][i][0]['key'],
#             df['features'][i][0]['loudness'],
#             df['features'][i][0]['mode'],
#             df['features'][i][0]['speechiness'],
#             df['features'][i][0]['acousticness'],
#             df['features'][i][0]['instrumentalness'],
#             df['features'][i][0]['liveness'],
#             df['features'][i][0]['valence'],
#             df['features'][i][0]['tempo']
#             ])
#         except:
#             featurelist.append([0,0,0,0,0,0,0,0,0,0,0])
#     featureframe = pd.DataFrame(featurelist,columns = ['danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo'] )
#     df = pd.concat([df,featureframe],axis = 1)
#     df = df.drop('features',axis = 1)
#     return df

In [None]:
# df_clean = flatten_features(df_small_try)

In [None]:
#json_normalize(df_small_try['features'], ['danceability', 'energy'])

In [None]:
#df_small_test['features'] = json_normalize(df_small['features'], ['danceability'])

In [None]:
# def get_audio_features(uri):
#     for uri in df["uri"]:
#         audio_features = sp.audio_features(uri)
#         df2 = df.append({'audio_features':audio_features}, ignore_index=True)
#         while

In [None]:
# 'danceability': 0.615,
#   'energy': 0.779,
#   'key': 2,
#   'loudness': -6.454,
#   'mode': 1,
#   'speechiness': 0.135,
#   'acousticness': 0.0665,
#   'instrumentalness': 0,
#   'liveness': 0.155,
#   'valence': 0.453,
#   'tempo': 160.0,

In [None]:
# feature_dict = {}

In [None]:
# BASE_URL = 'https://api.spotify.com/v1/'

In [None]:
# for t_uri in track_uris:
    
#     feature_dict[t_uri] = {'danceability': 0,
#                            'energy': 0,
#                            'key': 0,
#                            'loudness': 0,
#                            'mode': 0,
#                            'speechiness':0,
#                            'acousticness': 0.0665,
#                            'instrumentalness': 0,
#                            'liveness': 0.155,
#                            'valence': 0.453,
#                            'tempo': 0}
    
# #     r = all_tracks.get(BASE_URL + 'tracks/' + t_uri, headers=headers)
# #     r = r.json()
# #     feature_dict[t_uri]['popularity'] = r['popularity']
    
#     s = sp.audio_features(t_uri)
#     s = json_normalize(s)
#     feature_dict[t_uri]['danceability'] = s['danceability']
#     feature_dict[t_uri]['energy'] = s['energy']
#     feature_dict[t_uri]['speechiness'] = s['speechiness']
#     feature_dict[t_uri]['instrumentalness'] = s['instrumentalness']
#     feature_dict[t_uri]['tempo'] = s['tempo']