In [1]:
import spotipy
from config import CLIENT_ID, CLIENT_SECRET, PLAY_LIST, USER, DIRECT_URI
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [2]:
spotify = spotipy.Spotify(requests_timeout=10, client_credentials_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                                              client_secret=CLIENT_SECRET))
the_strokes_uri = 'https://open.spotify.com/artist/0epOFNiUfyON9EYx7Tpr6V'

# Getting All Track Data

In [3]:
results_all_tracks = spotify.playlist_tracks('https://open.spotify.com/playlist/1cyjmzQGadZ5vrGMjUijgI?si=26e84ab01fc749f3')

In [4]:
def get_track_statistics(tracks):
    """takes tracks and returns a dictionary of their analysis and features"""
    dict_ = {}
    # looping through each track and creating nested dictionary where the keys are the track names and the values are the merged track audio analyses and features data
    for track in tqdm(tracks['items']):
        dict_[track['track']['name']] =  {'album': track['track']['album']['name']} | spotify.audio_features(track['track']['id'])[0] | spotify.audio_analysis(track['track']['id'])['track']
    return dict_

In [5]:
# audio features & analysis data for all tracks globally
all_tracks = get_track_statistics(results_all_tracks)

  0%|          | 0/91 [00:00<?, ?it/s]

# Cleaning Data

In [7]:
# removing unnecessary columns
for track in all_tracks:
    del all_tracks[track]['codestring']
    del all_tracks[track]['echoprintstring']
    del all_tracks[track]['synchstring']
    del all_tracks[track]['rhythmstring']
    del all_tracks[track]['sample_md5']
    del all_tracks[track]['offset_seconds']
    del all_tracks[track]['window_seconds']
    del all_tracks[track]['analysis_sample_rate']
    del all_tracks[track]['analysis_channels']
    del all_tracks[track]['tempo_confidence']
    del all_tracks[track]['time_signature_confidence']
    del all_tracks[track]['key_confidence']
    del all_tracks[track]['mode_confidence']
    del all_tracks[track]['code_version']
    del all_tracks[track]['echoprint_version']
    del all_tracks[track]['synch_version']
    del all_tracks[track]['rhythm_version']
    del all_tracks[track]['type']
    del all_tracks[track]['id']
    del all_tracks[track]['uri']
    del all_tracks[track]['track_href']
    del all_tracks[track]['analysis_url']

In [8]:
all_tracks.keys()

dict_keys(['The Modern Age - Rough Trade Version', 'Last Nite - Rough Trade Version - The Modern Age B-Side', 'Hard To Explain', 'New York City Cops (Hard to Explain B-Side)', 'Last Nite', 'When It Started (Last Nite B-Side)', 'Someday', 'Alone, Together - Home Recording - Someday B-Side', 'Is This It - Home Recording - Someday B-Side', '12:51', 'The Way It Is - Home Recording - 12:51 B-Side', 'Reptilia', 'Modern Girls & Old Fashion Men - Reptilia B-Side', 'The End Has No End', 'Clampdown - The End Has No End B-Side / Live at Alexandra Palace, London, UK - Dec. 5, 2003', 'Juicebox', 'Hawaii - Juicebox B-Side', 'Heart In a Cage', 'I\'ll Try Anything Once ("You Only Live Once" demo) - Heart In a Cage B-Side', 'You Only Live Once', 'Mercy Mercy Me (The Ecology) - You Only Live Once B-Side', 'The Adults Are Talking', 'Selfless', 'Brooklyn Bridge To Chorus', 'Bad Decisions', 'Eternal Summer', 'At The Door', 'Why Are Sundays So Depressing', 'Not The Same Anymore', 'Ode To The Mets', 'Is This

# Getting Top 10 Tracks Data & Popularity

In [None]:
# code here

# Getting Track Data By Country
### (after testing this unfortunately doesn't work, it's the same data but just filters out songs not available in that country's Spotify market)

In [None]:
# # getting top_tracks for every country where Spotify is available
# top_tracks_all_countries = {}
# for country_code in tqdm(spotify.available_markets()['markets']):
#     top_tracks_all_countries[country_code] = spotify.artist_top_tracks(the_strokes_uri, country = country_code)

# def get_top_tracks_by_country(tracks):
#     """looping through each country and creating simple dictionary of country name: top 10 tracks in order and popularity"""
#     result_dict_ = {}
#     for country in tqdm(list(tracks.keys())):
#         top_10_list = []
#         for track in tracks[country]['tracks']:
#             top_10_list.append(track['name'])
#         result_dict_[country] = top_10_list
#     return result_dict_

# top_tracks_by_country = get_top_tracks_by_country(top_tracks_all_countries)
# top_tracks_by_country

In [12]:
df_all_tracks = pd.DataFrame(all_tracks).T
df_all_tracks

Unnamed: 0,album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,num_samples,duration,end_of_fade_in,start_of_fade_out
The Modern Age - Rough Trade Version,The Singles - Volume 01,0.493,0.787,2,-7.712,1,0.0334,0.0224,0.00227,0.253,0.88,160.12,192854,4,4252423,192.85365,0.44122,183.91946
Last Nite - Rough Trade Version - The Modern Age B-Side,The Singles - Volume 01,0.399,0.745,0,-7.497,1,0.0329,0.0438,0,0.112,0.514,207.923,198549,4,4378011,198.54926,0.37655,196.12735
Hard To Explain,Is This It,0.408,0.666,7,-6.286,1,0.0578,0.000455,0.862,0.0889,0.666,82.523,224333,4,4946550,224.33333,0.38707,224.33333
New York City Cops (Hard to Explain B-Side),The Singles - Volume 01,0.52,0.842,0,-7.664,1,0.0352,0.0633,0.000045,0.384,0.512,107.958,209505,4,4619587,209.50508,0.2146,202.58249
Last Nite,Is This It,0.624,0.899,0,-5.709,1,0.0296,0.0221,0.000137,0.0975,0.79,104.061,193373,4,4263882,193.37334,0.45805,191.33823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Call It Fate, Call It Karma",Comedown Machine,0.544,0.241,4,-14.779,0,0.03,0.983,0.774,0.101,0.359,109.437,204773,4,4515252,204.77333,0.0,193.6312
Drag Queen,Future Present Past,0.554,0.546,2,-4.681,0,0.0444,0.0803,0.328,0.102,0.507,124.115,273483,4,6030305,273.48322,0.14512,261.67728
OBLIVIUS,Future Present Past,0.457,0.898,8,-4.35,1,0.0503,0.0885,0.69,0.21,0.434,102.979,299158,4,6596429,299.15778,0.0,290.49905
Threat of Joy,Future Present Past,0.594,0.728,0,-6.917,1,0.0281,0.216,0.0881,0.124,0.687,137.97,264662,4,5835797,264.662,0.17424,252.05841


In [13]:
df_all_tracks.loc['Someday']

album                Is This It
danceability              0.539
energy                    0.521
key                           9
loudness                  -7.46
mode                          1
speechiness              0.0299
acousticness            0.00425
instrumentalness              0
liveness                  0.367
valence                   0.679
tempo                   106.035
duration_ms              183440
time_signature                4
num_samples             4044852
duration                 183.44
end_of_fade_in          0.38308
start_of_fade_out        183.44
Name: Someday, dtype: object

In [14]:
df_all_tracks.to_json('src/all_tracks.json',orient='columns')
df_all_tracks.to_excel('src/all_tracks.xlsx')

potential visualizations:

group by album, top tracks vs others

correlation between rankings and audio features

audio features by song

map by country - popularity, top tracks