In [2]:
import spotipy
from config import CLIENT_ID, CLIENT_SECRET, PLAY_LIST, USER, DIRECT_URI
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [3]:
spotify = spotipy.Spotify(requests_timeout=10, client_credentials_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                                              client_secret=CLIENT_SECRET))
the_strokes_uri = 'https://open.spotify.com/artist/0epOFNiUfyON9EYx7Tpr6V'

# Getting All Track Data

In [4]:
results_all_tracks = spotify.playlist_tracks('https://open.spotify.com/playlist/1cyjmzQGadZ5vrGMjUijgI?si=26e84ab01fc749f3')

In [7]:
def get_track_statistics(tracks):
    """takes tracks and returns a dictionary of their analysis and features"""
    dict_ = {}
    # looping through each track and creating nested dictionary where the keys are the track names and the values are the merged track audio analyses and features data
    for track in tqdm(tracks['items']):
        dict_[track['track']['name']] =  {'Album': track['track']['album']['name']} | spotify.audio_features(track['track']['id'])[0] | spotify.audio_analysis(track['track']['id'])['track']
    return dict_

In [14]:
# audio features & analysis data for all tracks globally
all_tracks = get_track_statistics(results_all_tracks)

  0%|          | 0/91 [00:00<?, ?it/s]

In [15]:
# Removing unnecessary columns
for track in all_tracks:
    del all_tracks[track]['codestring']
    del all_tracks[track]['echoprintstring']
    del all_tracks[track]['synchstring']
    del all_tracks[track]['rhythmstring']
    del all_tracks[track]['sample_md5']
    del all_tracks[track]['offset_seconds']
    del all_tracks[track]['window_seconds']
    del all_tracks[track]['analysis_sample_rate']
    del all_tracks[track]['analysis_channels']
    del all_tracks[track]['tempo_confidence']
    del all_tracks[track]['time_signature_confidence']
    del all_tracks[track]['key_confidence']
    del all_tracks[track]['mode_confidence']
    del all_tracks[track]['code_version']
    del all_tracks[track]['echoprint_version']
    del all_tracks[track]['synch_version']
    del all_tracks[track]['rhythm_version']
    del all_tracks[track]['type']
    del all_tracks[track]['id']
    del all_tracks[track]['uri']
    del all_tracks[track]['track_href']
    del all_tracks[track]['analysis_url']
    del all_tracks[track]['end_of_fade_in']
    del all_tracks[track]['start_of_fade_out']
    del all_tracks[track]['duration_ms']
    del all_tracks[track]['num_samples']

In [16]:
all_tracks.keys()

dict_keys(['The Modern Age - Rough Trade Version', 'Last Nite - Rough Trade Version - The Modern Age B-Side', 'Hard To Explain', 'New York City Cops (Hard to Explain B-Side)', 'Last Nite', 'When It Started (Last Nite B-Side)', 'Someday', 'Alone, Together - Home Recording - Someday B-Side', 'Is This It - Home Recording - Someday B-Side', '12:51', 'The Way It Is - Home Recording - 12:51 B-Side', 'Reptilia', 'Modern Girls & Old Fashion Men - Reptilia B-Side', 'The End Has No End', 'Clampdown - The End Has No End B-Side / Live at Alexandra Palace, London, UK - Dec. 5, 2003', 'Juicebox', 'Hawaii - Juicebox B-Side', 'Heart In a Cage', 'I\'ll Try Anything Once ("You Only Live Once" demo) - Heart In a Cage B-Side', 'You Only Live Once', 'Mercy Mercy Me (The Ecology) - You Only Live Once B-Side', 'The Adults Are Talking', 'Selfless', 'Brooklyn Bridge To Chorus', 'Bad Decisions', 'Eternal Summer', 'At The Door', 'Why Are Sundays So Depressing', 'Not The Same Anymore', 'Ode To The Mets', 'Is This

# Getting Top 10 Tracks Data & Popularity

In [30]:
results_top_tracks = spotify.artist_top_tracks(the_strokes_uri)

In [52]:
results_top_tracks['tracks']

[{'album': {'album_type': 'album',
   'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/0epOFNiUfyON9EYx7Tpr6V'},
     'href': 'https://api.spotify.com/v1/artists/0epOFNiUfyON9EYx7Tpr6V',
     'id': '0epOFNiUfyON9EYx7Tpr6V',
     'name': 'The Strokes',
     'type': 'artist',
     'uri': 'spotify:artist:0epOFNiUfyON9EYx7Tpr6V'}],
   'external_urls': {'spotify': 'https://open.spotify.com/album/2xkZV2Hl1Omi8rk2D7t5lN'},
   'href': 'https://api.spotify.com/v1/albums/2xkZV2Hl1Omi8rk2D7t5lN',
   'id': '2xkZV2Hl1Omi8rk2D7t5lN',
   'images': [{'height': 640,
     'url': 'https://i.scdn.co/image/ab67616d0000b273bfa99afb5ef0d26d5064b23b',
     'width': 640},
    {'height': 300,
     'url': 'https://i.scdn.co/image/ab67616d00001e02bfa99afb5ef0d26d5064b23b',
     'width': 300},
    {'height': 64,
     'url': 'https://i.scdn.co/image/ab67616d00004851bfa99afb5ef0d26d5064b23b',
     'width': 64}],
   'is_playable': True,
   'name': 'The New Abnormal',
   'release_date': '2020

In [54]:
def get_top_track_statistics(tracks):
    """takes top tracks and returns a dictionary of their analysis and features"""
    dict_ = {}
    # looping through each track and creating nested dictionary where the keys are the track names and the values are the merged track audio analyses and features data
    for track in tqdm(tracks['tracks']):
        dict_[track['name']] =  {'Album': track['album']['name']} | spotify.audio_features(track['id'])[0] | spotify.audio_analysis(track['id'])['track'] | {'Popularity': track['popularity']}
    return dict_

In [55]:
# audio features & analysis data for top_tracks
top_tracks = get_top_track_statistics(results_top_tracks)

  0%|          | 0/10 [00:00<?, ?it/s]

In [56]:
# Removing unnecessary columns
for track in top_tracks:
    del top_tracks[track]['codestring']
    del top_tracks[track]['echoprintstring']
    del top_tracks[track]['synchstring']
    del top_tracks[track]['rhythmstring']
    del top_tracks[track]['sample_md5']
    del top_tracks[track]['offset_seconds']
    del top_tracks[track]['window_seconds']
    del top_tracks[track]['analysis_sample_rate']
    del top_tracks[track]['analysis_channels']
    del top_tracks[track]['tempo_confidence']
    del top_tracks[track]['time_signature_confidence']
    del top_tracks[track]['key_confidence']
    del top_tracks[track]['mode_confidence']
    del top_tracks[track]['code_version']
    del top_tracks[track]['echoprint_version']
    del top_tracks[track]['synch_version']
    del top_tracks[track]['rhythm_version']
    del top_tracks[track]['type']
    del top_tracks[track]['id']
    del top_tracks[track]['uri']
    del top_tracks[track]['track_href']
    del top_tracks[track]['analysis_url']
    del top_tracks[track]['end_of_fade_in']
    del top_tracks[track]['start_of_fade_out']
    del top_tracks[track]['duration_ms']
    del top_tracks[track]['num_samples']

### ~~Getting Track Data By Country~~
(after testing this unfortunately doesn't work, it's the same data but just filters out songs not available in that country's Spotify market)

In [9]:
# # getting top_tracks for every country where Spotify is available
# top_tracks_all_countries = {}
# for country_code in tqdm(spotify.available_markets()['markets']):
#     top_tracks_all_countries[country_code] = spotify.artist_top_tracks(the_strokes_uri, country = country_code)

# def get_top_tracks_by_country(tracks):
#     """looping through each country and creating simple dictionary of country name: top 10 tracks in order and popularity"""
#     result_dict_ = {}
#     for country in tqdm(list(tracks.keys())):
#         top_10_list = []
#         for track in tracks[country]['tracks']:
#             top_10_list.append(track['name'])
#         result_dict_[country] = top_10_list
#     return result_dict_

# top_tracks_by_country = get_top_tracks_by_country(top_tracks_all_countries)
# top_tracks_by_country

# Conversion to Dataframe

In [62]:
df_all_tracks = pd.DataFrame(all_tracks).T
df_top_tracks = pd.DataFrame(top_tracks).T

In [64]:
df_all_tracks.loc['Someday']

Album               Is This It
danceability             0.539
energy                   0.521
key                          9
loudness                 -7.46
mode                         1
speechiness             0.0299
acousticness           0.00425
instrumentalness             0
liveness                 0.367
valence                  0.679
tempo                  106.035
time_signature               4
duration                183.44
Name: Someday, dtype: object

In [61]:
df_all_tracks.to_json('src/all_tracks.json', orient='columns')
df_all_tracks.to_excel('src/all_tracks.xlsx')

df_top_tracks.to_json('src/top_tracks.json', orient='columns')
df_top_tracks.to_excel('src/top_tracks.xlsx')

potential visualizations:
group by album, top tracks vs others
correlation between rankings and audio features
audio features by song
map by country - popularity, top tracks