In [139]:
import numpy as np
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import keyring
import time

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Setup Spotipy credentials and query wrapper

In [140]:
client_credentials_manager = SpotifyClientCredentials(client_id=keyring.get_password('spotify', 'cid'),
                                                      client_secret=keyring.get_password('spotify', 'secret') )
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

## Get sample artists data

In [141]:
artist_list = ['Manila Grey']
artist_id_list = ['7KC9q5wx0bxMD5ABgLCoEd']
track_id = '4oQa2jrXd3dqbMGNC7oyqf'

# artist_list = ['Flow G']
# artist_id_list = ['3PDFGpPl4ienSn5Vwisfrh']

# artist_list = ['Kiyo']
# artist_id_list = ['6gcteR920pLEynlHzjSRYd']

# artist_list = ['XXXTENTACION']
# artist_id_list = ['15UsOTVnJzReFVN1VCnxy4']

In [142]:
# View sp.track output
# sp.artist(artists[0])

## Read consolidated spotify daily charts

In [143]:
index = 0
track_list = []
track_dict = {}
# track_list = ['4oQa2jrXd3dqbMGNC7oyqf','612k0JGUvwPwfXoldaEqs8','51C5DGEFEGVxvwdI4HHcHf','3SftRVoGzAfsD0zk6bOB02','4JqgJSyI4kg8M0RYOPk8wL']           

for x in range(0,len(artist_list)):
    results = sp.search(q=str(artist_list[x]), limit=50)
    for i, t in enumerate(results['tracks']['items']):
        track_list.append(str(t['id'].strip( 'u' )))
        track_dict[str(t['id'].strip( 'u' ))] = t['name']
        print("Adding ",t['id'],t['name'])

print(track_dict)

Adding  3SG0qa5JRxmMIee93u9E6I Urong Sulong
Adding  2kRjoaqiyeS0Faus5wKztH Ikaw Lang
Adding  0DCzMvkONMvWDiOtVzyRUX Dear Breeze
Adding  1HDqBV6gWeqFStJHLmPjh4 ふたりの夏物語 NEVER ENDING SUMMER
Adding  4Fi00h3wo3P8gmhAHT1qX4 Bang! (feat. Hayley Kiyoko) - AhhHaa Remix
Adding  1UcrhQg0HiRmlOHhIkEFQR Nandito Na
Adding  7tQ2P3dP76bj1aSEfe8StO Dantay
Adding  44Iv2pSC8nb5lXqJK37GxM Polaroid
Adding  3JNfGfLZMYZtXpL7q4zU0Z Urong Sulong - Band Version
Adding  53KBJe90rbY3NDa8UMjl8G G
Adding  22p7tgi6cFVBaH7vAbgcUY River's Island
Adding  4RvJ5sXFvWt8n9HHt7yBRD Summer Suspicion
Adding  6PMobmlh8yrpEjFjioPFGy Kiyomizu-Dera
Adding  5Apf3vHBoTfRBTRhrndVXO Midnight Down Town
Adding  7oFpSsfjl8ElUesHJPT8Cv Missed Calls (feat. Hayley Kiyoko)
Adding  0Fak0BCePEYQzfApLLKPIc Pantalan, Pt. 1
Adding  3dNjUFt6EFU4Gq6Q5vfJqf Girls Like Girls
Adding  6aYRLFKPuIt399nJdro93G Scramble Cross
Adding  6GbgHcskEO8vk9Rlrw908V ガラスのPALM TREE
Adding  3YdrHVyjuChT3koerYnC80 Iskripted
Adding  38LrhW3eQSt3cK88dkvl5e Misty Night Cr

In [144]:
track_df = pd.DataFrame([track_dict]).T
track_df = track_df.reset_index()
track_df.columns = ['track_id', 'track_name']
track_df.head()

Unnamed: 0,track_id,track_name
0,3SG0qa5JRxmMIee93u9E6I,Urong Sulong
1,2kRjoaqiyeS0Faus5wKztH,Ikaw Lang
2,0DCzMvkONMvWDiOtVzyRUX,Dear Breeze
3,1HDqBV6gWeqFStJHLmPjh4,ふたりの夏物語 NEVER ENDING SUMMER
4,4Fi00h3wo3P8gmhAHT1qX4,Bang! (feat. Hayley Kiyoko) - AhhHaa Remix


## Get data of unique tracks in charts 

In [145]:
def get_track_data(t_id):                    
    track_data = sp.track(t_id)
    track_features = sp.audio_features(t_id)
    
    #get only main(first) artist
    td_list = [t_id,\
               track_data['name'],\
               track_data['artists'][0]['id'],\
               track_data['artists'][0]['name'],\
               track_data['album']['uri'].split(":")[2],\
               track_data['duration_ms'],\
               track_data['album']['release_date'],\
               track_data['popularity']]
    data = pd.DataFrame([td_list], columns = ['track_id','track_name','artist_id','artist_name','album_id','duration','release_date','popularity'])

    relevant_cols = ['danceability', 'energy', 'key', 'loudness', 'mode',\
                     'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']  
    
    tf_data = pd.DataFrame(track_features)
    tf_data = tf_data[relevant_cols]
    
    data = pd.concat([data, tf_data], axis=1)
    return data


In [146]:
# get_track_data(track_id)

In [147]:
df_list=[]

for i,track_id in enumerate(track_list):
    print('[%d/%d] Fetching track data for %s... ' % 
            (i+1,len(track_list),track_df[track_df['track_id']==track_id]['track_name']), end = " ") 
    try:
        track_data = get_track_data(track_id) 
        df_list.append(track_data)
        print('done!')
    except:
        print('[%d/%d] Failed to fetch %s... ' % 
            (i+1,len(track_list),track_df[track_df['track_id']==track_id]['track_name']), end = " ") 
    
    #sleep for 100 secs per 100 requests to avoid being blocked
    if (i % 100 == 0)&(i > 0):
        time.sleep(5)

[1/50] Fetching track data for 0    Urong Sulong
Name: track_name, dtype: object...  done!
[2/50] Fetching track data for 1    Ikaw Lang
Name: track_name, dtype: object...  done!
[3/50] Fetching track data for 2    Dear Breeze
Name: track_name, dtype: object...  done!
[4/50] Fetching track data for 3    ふたりの夏物語 NEVER ENDING SUMMER
Name: track_name, dtype: object...  done!
[5/50] Fetching track data for 4    Bang! (feat. Hayley Kiyoko) - AhhHaa Remix
Name: track_name, dtype: object...  done!
[6/50] Fetching track data for 5    Nandito Na
Name: track_name, dtype: object...  done!
[7/50] Fetching track data for 6    Dantay
Name: track_name, dtype: object...  done!
[8/50] Fetching track data for 7    Polaroid
Name: track_name, dtype: object...  done!
[9/50] Fetching track data for 8    Urong Sulong - Band Version
Name: track_name, dtype: object...  done!
[10/50] Fetching track data for 9    G
Name: track_name, dtype: object...  done!
[11/50] Fetching track data for 10    River's Island
Nam

In [148]:
tracks_data_df = pd.concat(df_list)
tracks_data_df = tracks_data_df[tracks_data_df['artist_id'].isin(artist_id_list)]
tracks_data_df.head()

Unnamed: 0,track_id,track_name,artist_id,artist_name,album_id,duration,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2kRjoaqiyeS0Faus5wKztH,Ikaw Lang,6gcteR920pLEynlHzjSRYd,Kiyo,3EMkSD7V6DOoEAwk9pnOfa,241658,2019-05-10,61,0.705,0.499,7,-8.554,1,0.0714,0.317,1.6e-05,0.131,0.5,79.992
0,1UcrhQg0HiRmlOHhIkEFQR,Nandito Na,6gcteR920pLEynlHzjSRYd,Kiyo,52iwum3yPBYLWiZQMqxIU9,213681,2020-04-20,55,0.65,0.22,4,-16.59,1,0.166,0.835,0.0568,0.133,0.332,82.005
0,7tQ2P3dP76bj1aSEfe8StO,Dantay,6gcteR920pLEynlHzjSRYd,Kiyo,02qNpe3vtTVHzGytclgmWe,292414,2020-02-14,54,0.386,0.36,2,-16.437,1,0.315,0.812,0.00195,0.0936,0.141,71.576
0,53KBJe90rbY3NDa8UMjl8G,G,6gcteR920pLEynlHzjSRYd,Kiyo,7G4Cmxnxt1cSBLXphFEE8m,289567,2019-01-03,52,0.554,0.619,2,-9.26,0,0.12,0.499,0.000133,0.0865,0.26,82.971
0,0Fak0BCePEYQzfApLLKPIc,"Pantalan, Pt. 1",6gcteR920pLEynlHzjSRYd,Kiyo,4556ESN6fyzN3mLRq3qr8j,270602,2019-08-10,50,0.649,0.517,10,-11.934,1,0.255,0.653,0.0,0.0922,0.607,89.766


In [149]:
# tracks_data_df.to_csv('data/seed_tracks_flow_g.csv', index=False, encoding='utf-8')

In [150]:
# tracks_data_df.describe()

## Get data of unique artists in charts 

In [151]:
artist_df = tracks_data_df[['artist_id','artist_name']].drop_duplicates()
artist_df.head()

Unnamed: 0,artist_id,artist_name
0,6gcteR920pLEynlHzjSRYd,Kiyo


In [152]:
def get_artist_data(a_id):
       
    artist_data = sp.artist(a_id)

    ad_list = [a_id,\
               artist_data['name'],\
               artist_data['followers']['total'],\
               artist_data['genres'],\
               artist_data['popularity']]
    data = pd.DataFrame([ad_list], columns = ['artist_id','artist_name','total_followers','genres','popularity'])

    return data

In [153]:
get_artist_data(artist_id_list[0])

Unnamed: 0,artist_id,artist_name,total_followers,genres,popularity
0,6gcteR920pLEynlHzjSRYd,Kiyo,127797,"[pinoy hip hop, pinoy indie, tagalog rap]",59


In [154]:
artist_list = artist_df['artist_id'].values
df_list=[]

for i,artist_id in enumerate(artist_list):
    print('[%d/%d] Fetching artist data for %s... ' % 
          (i+1,len(artist_list),artist_df[artist_df['artist_id']==artist_id]['artist_name'].values[0]), end = " ") 
    artist_data = get_artist_data(artist_id) 
    df_list.append(artist_data)
    print('done!')
    
    #sleep for 100 secs per 100 requests to avoid being blocked
    if (i % 100 == 0)& (i > 0):
        time.sleep(5)   

[1/1] Fetching artist data for Kiyo...  done!


In [155]:
artist_data_df = pd.concat(df_list)
artist_data_df

Unnamed: 0,artist_id,artist_name,total_followers,genres,popularity
0,6gcteR920pLEynlHzjSRYd,Kiyo,127797,"[pinoy hip hop, pinoy indie, tagalog rap]",59


In [156]:
# artist_data_df.to_csv('data/seed_artists.csv', index=False, encoding='utf-8')

## Merge track and artist data

In [157]:
track_artist_df = tracks_data_df.merge(artist_data_df, on=['artist_id','artist_name'])
track_artist_df = track_artist_df.rename(columns={'popularity_x': 'track_popularity', 'popularity_y': 'artist_popularity'})
track_artist_df.head(3)

Unnamed: 0,track_id,track_name,artist_id,artist_name,album_id,duration,release_date,track_popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,total_followers,genres,artist_popularity
0,2kRjoaqiyeS0Faus5wKztH,Ikaw Lang,6gcteR920pLEynlHzjSRYd,Kiyo,3EMkSD7V6DOoEAwk9pnOfa,241658,2019-05-10,61,0.705,0.499,7,-8.554,1,0.0714,0.317,1.6e-05,0.131,0.5,79.992,127797,"[pinoy hip hop, pinoy indie, tagalog rap]",59
1,1UcrhQg0HiRmlOHhIkEFQR,Nandito Na,6gcteR920pLEynlHzjSRYd,Kiyo,52iwum3yPBYLWiZQMqxIU9,213681,2020-04-20,55,0.65,0.22,4,-16.59,1,0.166,0.835,0.0568,0.133,0.332,82.005,127797,"[pinoy hip hop, pinoy indie, tagalog rap]",59
2,7tQ2P3dP76bj1aSEfe8StO,Dantay,6gcteR920pLEynlHzjSRYd,Kiyo,02qNpe3vtTVHzGytclgmWe,292414,2020-02-14,54,0.386,0.36,2,-16.437,1,0.315,0.812,0.00195,0.0936,0.141,71.576,127797,"[pinoy hip hop, pinoy indie, tagalog rap]",59


In [158]:
track_artist_df.to_csv('data/seed_tracks_kiyo.csv', index=False, encoding='utf-8')

## Resources
- Spotify API reference manual https://developer.spotify.com/documentation/web-api/reference/search/search/