# Top 50 Tracks and Audio Features

The first step is to log-in to Spotify which requires a token for authorization.
The scope 'user-top-read' is used to obtain the top user data.

In [3]:
# List of user's top 50 tracks
import sys
import spotipy
import spotipy.util as util

if len(sys.argv) > 1:
    username = sys.argv[1]
else:
    print("Usage: %s username" % (sys.argv[0],))
    sys.exit()

scope = 'user-top-read'
token = util.prompt_for_user_token(username, scope, client_id='c3bf1281ee3348aca441cc5e6c369f49',client_secret='2de133361d8246f29f458d12d8b5127c',redirect_uri='http://example.com/callback/')

## Obtaining Data
Spotipy is used to obtain the list of the current top tracks for the signed in user. The list is limited to 50 as that is the only data available from Spotify. There will be three lists for each time period consisting of the artist name, track name, and track id.

In [4]:
# empty track lists
short_artist_name = []
short_track_name = []
short_track_id = []

medium_artist_name = []
medium_track_name = []
medium_track_id = []

long_artist_name = []
long_track_name = []
long_track_id = []

if token:
    sp = spotipy.Spotify(auth=token)
    sp.trace = False
    ranges = ['short_term', 'medium_term', 'long_term']
    for range in ranges:
        results = sp.current_user_top_tracks(time_range=range, limit=50)
        for i, item in enumerate(results['items']):
            if (range=='short_term'):
                short_artist_name.append(item['artists'][0]['name'])
                short_track_name.append(item['name'])
                short_track_id.append(item['id'])
            elif(range=='medium_term'):
                medium_artist_name.append(item['artists'][0]['name'])
                medium_track_name.append(item['name'])
                medium_track_id.append(item['id'])
            else:
                long_artist_name.append(item['artists'][0]['name'])
                long_track_name.append(item['name'])
                long_track_id.append(item['id'])

else:
    print("Can't get token for", username)

## Using Pandas 
We store the lists into a data frame for each time period. Then we clean the data by dropping any duplicates of track names or artists. This can occur because an artist may have two version of the same song.

In [5]:
import pandas as pd

df_short_tracks = pd.DataFrame({'artist_name':short_artist_name,'track_name':short_track_name,'track_id':short_track_id})
df_medium_tracks = pd.DataFrame({'artist_name':medium_artist_name,'track_name':medium_track_name,'track_id':medium_track_id})
df_long_tracks = pd.DataFrame({'artist_name':long_artist_name,'track_name':long_track_name,'track_id':long_track_id})

df_short_tracks.drop_duplicates(subset=['artist_name','track_name'], inplace=True)
df_medium_tracks.drop_duplicates(subset=['artist_name','track_name'], inplace=True)
df_long_tracks.drop_duplicates(subset=['artist_name','track_name'], inplace=True)

We can now see the the top tracks for each time period.

In [6]:
df_short_tracks.head()

Unnamed: 0,artist_name,track_name,track_id
0,Lana Del Rey,Norman fucking Rockwell,3RIgHHpnFKj5Rni1shokDj
1,Childish Gambino,Redbone,0wXuerDYiBnERgIpbb3JBR
2,Ruel,Dazed & Confused,2pyjbGTpJCPjMYwCbdymiF
3,Jon Waltz,Sportscar,7aS9BSdc8HlNvSAEdALb18
4,88rising,Shouldn't Couldn't Wouldn't,16ox7ZM0ozbzBOTjFecYuY


In [7]:
df_medium_tracks.head()

Unnamed: 0,artist_name,track_name,track_id
0,Dominic Fike,Phone Numbers,3f9Mzvd3URfbbIJBX4pz9Z
1,JPEGMAFIA,Free The Frail,5r7OKhOQl2vM8SZnCXSQk1
2,ILLENIUM,Good Things Fall Apart (with Jon Bellion),6pooRNiLyYpxZeIA5kJ5EX
3,Daniel Caesar,SUPERPOSITION,45PxuJqJBnPXZKLxoo9Apj
4,Rich Brian,100 Degrees,2ZDpSQfBdgkooeXw6oj3Uz


In [8]:
df_long_tracks.head()

Unnamed: 0,artist_name,track_name,track_id
0,BROCKHAMPTON,GUMMY,42tBlHWL3VfDkUM2iWcc5p
1,Kevin Abstract,Peach,5JRMqkR82k2fdDEAim9SCN
2,BROCKHAMPTON,SWEET,2DgMxFMUQRPthW4ROhjen1
3,Dominic Fike,Phone Numbers,3f9Mzvd3URfbbIJBX4pz9Z
4,BROCKHAMPTON,GOLD,7HRv1sYuwgoea1m0JRvChV


## Obtaining Audio Features
To use spotipy for audio features, we need to put in the track id for each song. We can then get the list of audio features and store it into a data frame.

In [9]:
short_term_features = sp.audio_features(df_short_tracks['track_id'])
medium_term_features = sp.audio_features(df_medium_tracks['track_id'])
long_term_features = sp.audio_features(df_long_tracks['track_id'])

In [10]:
df_short_audio_features = pd.DataFrame.from_dict(short_term_features,orient='columns')
df_medium_audio_features = pd.DataFrame.from_dict(medium_term_features,orient='columns')
df_long_audio_features = pd.DataFrame.from_dict(long_term_features,orient='columns')

## Merging Data
To analyze the songs, the only elements we will look at are acousticness, valence, liveness, instrumentalness, energy, tempo, danceability, and speechiness, so we can drop the other columns. However, we will keep the id column and rename it to 'track_id' so the audio features data frame and the tracks can be merged.

In [11]:
columns_to_drop = ['analysis_url','track_href','type','uri','key','mode','analysis_url','duration_ms','time_signature']

df_short_audio_features.drop(columns_to_drop, axis=1,inplace=True)
df_medium_audio_features.drop(columns_to_drop, axis=1,inplace=True)
df_long_audio_features.drop(columns_to_drop, axis=1,inplace=True)

df_short_audio_features.rename(columns={'id': 'track_id'}, inplace=True)
df_medium_audio_features.rename(columns={'id': 'track_id'}, inplace=True)
df_long_audio_features.rename(columns={'id': 'track_id'}, inplace=True)

df_short = pd.merge(df_short_tracks,df_short_audio_features,on='track_id')
df_medium = pd.merge(df_medium_tracks,df_medium_audio_features,on='track_id')
df_long = pd.merge(df_long_tracks,df_long_audio_features,on='track_id')

Now we can check if the merge was sucessful.

In [12]:
df_short.head()

Unnamed: 0,artist_name,track_name,track_id,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence
0,Lana Del Rey,Norman fucking Rockwell,3RIgHHpnFKj5Rni1shokDj,0.967,0.218,0.215,0.0847,0.0948,-12.49,0.0368,76.74,0.138
1,Childish Gambino,Redbone,0wXuerDYiBnERgIpbb3JBR,0.167,0.743,0.347,0.00951,0.103,-11.174,0.121,160.143,0.572
2,Ruel,Dazed & Confused,2pyjbGTpJCPjMYwCbdymiF,0.102,0.688,0.5,0.0,0.355,-7.411,0.138,127.906,0.418
3,Jon Waltz,Sportscar,7aS9BSdc8HlNvSAEdALb18,0.194,0.757,0.505,0.0138,0.147,-10.244,0.0626,132.422,0.596
4,88rising,Shouldn't Couldn't Wouldn't,16ox7ZM0ozbzBOTjFecYuY,0.41,0.454,0.379,3e-06,0.0837,-8.199,0.132,169.055,0.275


In [13]:
df_medium.head()

Unnamed: 0,artist_name,track_name,track_id,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence
0,Dominic Fike,Phone Numbers,3f9Mzvd3URfbbIJBX4pz9Z,0.239,0.884,0.539,0.0,0.0805,-6.389,0.208,85.008,0.776
1,JPEGMAFIA,Free The Frail,5r7OKhOQl2vM8SZnCXSQk1,0.515,0.553,0.509,0.0,0.154,-7.955,0.353,153.674,0.264
2,ILLENIUM,Good Things Fall Apart (with Jon Bellion),6pooRNiLyYpxZeIA5kJ5EX,0.0128,0.643,0.594,0.0,0.118,-5.453,0.0318,144.033,0.442
3,Daniel Caesar,SUPERPOSITION,45PxuJqJBnPXZKLxoo9Apj,0.911,0.589,0.279,0.000128,0.087,-11.209,0.0408,115.656,0.333
4,Rich Brian,100 Degrees,2ZDpSQfBdgkooeXw6oj3Uz,0.118,0.756,0.648,0.0,0.515,-5.287,0.0731,80.979,0.657


In [14]:
df_long.head()

Unnamed: 0,artist_name,track_name,track_id,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence
0,BROCKHAMPTON,GUMMY,42tBlHWL3VfDkUM2iWcc5p,0.198,0.687,0.672,0.0,0.175,-5.975,0.0573,80.035,0.792
1,Kevin Abstract,Peach,5JRMqkR82k2fdDEAim9SCN,0.683,0.646,0.681,5e-06,0.107,-5.847,0.0764,131.896,0.421
2,BROCKHAMPTON,SWEET,2DgMxFMUQRPthW4ROhjen1,0.543,0.718,0.628,0.0,0.116,-6.007,0.223,92.107,0.722
3,Dominic Fike,Phone Numbers,3f9Mzvd3URfbbIJBX4pz9Z,0.239,0.884,0.539,0.0,0.0805,-6.389,0.208,85.008,0.776
4,BROCKHAMPTON,GOLD,7HRv1sYuwgoea1m0JRvChV,0.0716,0.808,0.647,0.000466,0.205,-6.947,0.166,110.036,0.29


The data seems to be fine and we can convert it into csv files for analysis using other programs.

In [15]:
df_short.to_csv('SpotifyAudioFeatures12022019ShortTerm.csv')
df_medium.to_csv('SpotifyAudioFeatures12022019MediumTerm.csv')
df_long.to_csv('SpotifyAudioFeatures12022019LongTerm.csv')