In [175]:
import numpy as np
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import keyring
import time

## 0. Setup Spotipy credentials and query wrapper

In [176]:
client_credentials_manager = SpotifyClientCredentials(client_id=keyring.get_password('spotify', 'cid'),
                                                      client_secret=keyring.get_password('spotify', 'secret') )
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


Set keyword

In [177]:
KEYWORD = 'Nyoy Volante'

## 1. Search Artist for keyword

##### View the structure of a search query

In [178]:
results = sp.search(q=KEYWORD, type='artist', market = 'PH')
[r['name'] for r in results['artists']['items']]

['Nyoy Volante', 'Nyoy Volante with Mannos']

In [179]:
results['artists'].keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [180]:
results['artists']['items'][0].keys()

dict_keys(['external_urls', 'followers', 'genres', 'href', 'id', 'images', 'name', 'popularity', 'type', 'uri'])

In [181]:
artist_id = results['artists']['items'][0]['id']

## 2. Get Albums of an Artist Data

##### View the structure of a albums of an artist query

***

In [182]:
results = sp.artist_albums(artist_id)
albums = results['items']
while results['next']:
    results = sp.next(results)
    albums.extend(results['items'])

album_id = []
album_name = []
album_numtracks = []
album_id.extend([p['href'].split('/')[5] for p in albums])
album_name.extend([p['name'] for p in albums])
album_numtracks.extend([p['total_tracks'] for p in albums])

In [183]:
album_df = pd.DataFrame()
album_df['album_id'] = album_id
album_df['album_name'] = album_name
album_df['album_numtracks'] = album_numtracks
album_df

Unnamed: 0,album_id,album_name,album_numtracks
0,5jzfPxlD5UD4Aw3cEnelcC,Tuloy Pa Rin,12
1,4oif6qMFEOF3E6uf0bvDOV,Tuloy Pa Rin (Minus One Versions),12
2,4aW4RCeWwmde6JSVjDUgaD,In You (International Version),10
3,1maIgsJ7htTqCDrhe8aucE,Now Hear This,11
4,2A48roBpFpFjatWtksmsQE,Heartstrings,12
5,4EQ7a4ztLVN5OUgxJJVIxY,Nyoy Volante,11
6,6Mo7cq7JRqSBZVDqXvwSel,Huling Sayaw,1
7,53Vzv0bR02zAV9S2bsCyFc,I Wanna Kiss You,1
8,0lPcGBzpPjoMAS62iExieD,Awit Ko Na Naisulat Dahil Sa Kagandahan Mo (Mi...,1
9,5S0ZDnuNJpjlN9YiHYpsCW,Awit Ko Na Naisulat Dahil Sa Kagandahan Mo,1


## 3. Get Tracks from a Playlist

##### View the structure of a playlist_tracks query

In [184]:
track = sp.album_tracks('6BIadH4xk29Synkb6Rh8WR')

In [185]:
track

{'href': 'https://api.spotify.com/v1/albums/6BIadH4xk29Synkb6Rh8WR/tracks?offset=0&limit=50',
 'items': [{'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4yBBNmdvVaoPEnr2lt14q7'},
     'href': 'https://api.spotify.com/v1/artists/4yBBNmdvVaoPEnr2lt14q7',
     'id': '4yBBNmdvVaoPEnr2lt14q7',
     'name': 'Faust',
     'type': 'artist',
     'uri': 'spotify:artist:4yBBNmdvVaoPEnr2lt14q7'}],
   'available_markets': ['AD',
    'AE',
    'AL',
    'AR',
    'AT',
    'AU',
    'BA',
    'BE',
    'BG',
    'BH',
    'BO',
    'BR',
    'BY',
    'CA',
    'CH',
    'CL',
    'CO',
    'CR',
    'CY',
    'CZ',
    'DE',
    'DK',
    'DO',
    'DZ',
    'EC',
    'EE',
    'EG',
    'ES',
    'FI',
    'FR',
    'GB',
    'GR',
    'GT',
    'HK',
    'HN',
    'HR',
    'HU',
    'ID',
    'IE',
    'IL',
    'IN',
    'IS',
    'IT',
    'JO',
    'JP',
    'KR',
    'KW',
    'KZ',
    'LB',
    'LI',
    'LT',
    'LU',
    'LV',
    'MA',
    'MC',
    'MD',
 

In [186]:
track.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [187]:
track['items']

[{'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4yBBNmdvVaoPEnr2lt14q7'},
    'href': 'https://api.spotify.com/v1/artists/4yBBNmdvVaoPEnr2lt14q7',
    'id': '4yBBNmdvVaoPEnr2lt14q7',
    'name': 'Faust',
    'type': 'artist',
    'uri': 'spotify:artist:4yBBNmdvVaoPEnr2lt14q7'}],
  'available_markets': ['AD',
   'AE',
   'AL',
   'AR',
   'AT',
   'AU',
   'BA',
   'BE',
   'BG',
   'BH',
   'BO',
   'BR',
   'BY',
   'CA',
   'CH',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DE',
   'DK',
   'DO',
   'DZ',
   'EC',
   'EE',
   'EG',
   'ES',
   'FI',
   'FR',
   'GB',
   'GR',
   'GT',
   'HK',
   'HN',
   'HR',
   'HU',
   'ID',
   'IE',
   'IL',
   'IN',
   'IS',
   'IT',
   'JO',
   'JP',
   'KR',
   'KW',
   'KZ',
   'LB',
   'LI',
   'LT',
   'LU',
   'LV',
   'MA',
   'MC',
   'MD',
   'ME',
   'MK',
   'MT',
   'MX',
   'MY',
   'NI',
   'NL',
   'NO',
   'NZ',
   'OM',
   'PA',
   'PE',
   'PH',
   'PL',
   'PS',
   'PT',
   'PY',
   'QA',
   '

***

In [188]:
def get_relevant_track_data(tracks_data, album_id, album_name):
    try:
        relevant_track_data = { key: tracks_data[key] for key in ['id','artists','name','duration_ms'] }
        relevant_track_data['artist_id']=[artist['id'] for artist in relevant_track_data['artists'] ]
        relevant_track_data['artist_name']=[artist['name']for artist in relevant_track_data['artists'] ]
        relevant_track_data['num_artists']=len([artist['id'] for artist in relevant_track_data['artists']]) 
        relevant_track_data['album_id']=album_id
        relevant_track_data['album_name']=album_name
        
        relevant_track_data.pop('artists', None)
        return relevant_track_data
    except:
        return 

In [189]:
#album_tracks
all_track_data = []

for _,p_id,p_name, p_numtracks in album_df[['album_id','album_name','album_numtracks']].to_records():
    print("Fetching data for album = %s, with total tracks: %d" % (p_name,p_numtracks))
    n_fetches = p_numtracks // 100
    
    album_track_data = []
    #get tracks in batches of 100
    for n in np.arange(n_fetches+1):
        track_data = sp.album_tracks(p_id, offset=n*100)
        album_track_data.extend([get_relevant_track_data(item, p_id,p_name) for item in track_data['items']])
        
    all_track_data.extend(album_track_data)

Fetching data for album = Tuloy Pa Rin, with total tracks: 12
Fetching data for album = Tuloy Pa Rin (Minus One Versions), with total tracks: 12
Fetching data for album = In You (International Version), with total tracks: 10
Fetching data for album = Now Hear This, with total tracks: 11
Fetching data for album = Heartstrings, with total tracks: 12
Fetching data for album = Nyoy Volante, with total tracks: 11
Fetching data for album = Huling Sayaw, with total tracks: 1
Fetching data for album = I Wanna Kiss You, with total tracks: 1
Fetching data for album = Awit Ko Na Naisulat Dahil Sa Kagandahan Mo (Minus One), with total tracks: 1
Fetching data for album = Awit Ko Na Naisulat Dahil Sa Kagandahan Mo, with total tracks: 1
Fetching data for album = Pieces of My Life, with total tracks: 1
Fetching data for album = Chosen, with total tracks: 1
Fetching data for album = God Gave Me You, with total tracks: 1
Fetching data for album = PhilPop 2020: Music Breaking Borders, with total tracks: 

In [190]:
for n,a in enumerate(all_track_data):
    try:
        len(a)
    except:
        print(n)

In [191]:
tracks_df = pd.DataFrame([data for data in all_track_data if data is not None])
tracks_df = tracks_df.rename(columns={'id':'track_id'})
tracks_df['artist_id'] = tracks_df.apply(lambda x: x['artist_id'][0] if x['num_artists']==1 else x['artist_id'], axis=1)
tracks_df['artist_name'] = tracks_df.apply(lambda x: x['artist_name'][0] if x['num_artists']==1 else x['artist_name'], axis=1)
tracks_df.head()

Unnamed: 0,track_id,name,duration_ms,artist_id,artist_name,num_artists,album_id,album_name
0,2pAFgfaNVioQGhC0jhtcsG,Ipagpatawad Mo,239333,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,1,5jzfPxlD5UD4Aw3cEnelcC,Tuloy Pa Rin
1,33pHgWjuAXvmE6Z9b51avt,Bakit Ba Ganyan,312293,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,1,5jzfPxlD5UD4Aw3cEnelcC,Tuloy Pa Rin
2,3O7vNSKiSa5uWpaxjC4712,Pikit,391080,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,1,5jzfPxlD5UD4Aw3cEnelcC,Tuloy Pa Rin
3,2PTQBDNpLnjix9Dsa33d8l,Tuloy Pa Rin,236453,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,1,5jzfPxlD5UD4Aw3cEnelcC,Tuloy Pa Rin
4,23kb0nomnO71oLqw7Okj3T,Magkasuyo Buong Gabi,235226,"[3pTaNQJ7TAU4yL7KEZp5sF, 5xJfSowQoxaxY4DCVLTfr5]","[Nyoy Volante, Sabrina]",2,5jzfPxlD5UD4Aw3cEnelcC,Tuloy Pa Rin


In [192]:
len(tracks_df)

512

In [193]:
len(tracks_df['track_id'].unique())

512

In [194]:
tracks_df.to_csv("data/"+KEYWORD+"_album_tracks.csv",encoding='utf=8',index=False)

## 4. Get Tracks from a Album

In [195]:
def get_track_data(t_id, playlist_id,playlist_name):                    
    track_data = sp.track(t_id)
    track_features = sp.audio_features(t_id)
    
    #get only main(first) artist
    td_list = [t_id,\
               track_data['name'],\
               track_data['artists'][0]['id'],\
               track_data['artists'][0]['name'],\
               track_data['album']['uri'].split(":")[2],\
               track_data['duration_ms'],\
               track_data['album']['release_date'],\
               track_data['popularity']]
    data = pd.DataFrame([td_list], columns = ['track_id','track_name','artist_id','artist_name','album_id','duration','release_date','popularity'])

    relevant_cols = ['danceability', 'energy', 'key', 'loudness', 'mode',\
                     'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']  
    
    tf_data = pd.DataFrame(track_features)
    tf_data = tf_data[relevant_cols]
    #tag with source playlist
    tf_data['playlist_id'] = playlist_id
    tf_data['playlist_name'] = playlist_name
    
    data = pd.concat([data, tf_data], axis=1)
    return data


In [196]:
downloaded_track_data = []

In [197]:
track_list = tracks_df['track_id'].values
playlist_name_list = tracks_df['track_id'].values
playlist_id_list = tracks_df['track_id'].values
df_list=[]

for i,track_id in enumerate(track_list):
    try:
        if track_id not in downloaded_track_data:
            print('[%d/%d] Fetching track data for %s... ' % 
                  (i+1,len(track_list),tracks_df[tracks_df['track_id']==track_id]['name'].values[0]), end = " ") 
            track_data = get_track_data(track_id, playlist_id_list[i],playlist_name_list[i]) 
            df_list.append(track_data)
            downloaded_track_data.append(track_id)
            print('done!')
    except:
        continue
    else:
        continue
    
    #sleep for 60 secs per 100 requests to avoid being blocked
    if (i % 100 == 0)&(i > 0):
        time.sleep(20)    

[1/512] Fetching track data for Ipagpatawad Mo...  done!
[2/512] Fetching track data for Bakit Ba Ganyan...  done!
[3/512] Fetching track data for Pikit...  done!
[4/512] Fetching track data for Tuloy Pa Rin...  done!
[5/512] Fetching track data for Magkasuyo Buong Gabi...  done!
[6/512] Fetching track data for Nandito Ako...  done!
[7/512] Fetching track data for Sa Isip Ko...  done!
[8/512] Fetching track data for Kamakailan Lang...  done!
[9/512] Fetching track data for Sana Dalawa Ang Puso Ko...  done!
[10/512] Fetching track data for Miss Kita...  done!
[11/512] Fetching track data for Basta't Kasama Kita...  done!
[12/512] Fetching track data for Bukas Na Lang Kita Mamahalin...  done!
[13/512] Fetching track data for Ipagpatawad Mo - Instrumental...  done!
[14/512] Fetching track data for Bakit Ba Ganyan - Instrumental...  done!
[15/512] Fetching track data for Pikit - Instrumental...  done!
[16/512] Fetching track data for Tuloy Pa Rin - Instrumental...  done!
[17/512] Fetching 

[134/512] Fetching track data for God Gave Me You - Acoustic...  done!
[135/512] Fetching track data for Fix You - Unplugged...  done!
[136/512] Fetching track data for Maybe This Time - Acoustic...  done!
[137/512] Fetching track data for Caught In That Feeling - Acoustic...  done!
[138/512] Fetching track data for I Believe I Can Fly - Acoustic...  done!
[139/512] Fetching track data for Lord Patawad - Acoustic...  done!
[140/512] Fetching track data for Make You Feel My Love - Acoustic...  done!
[141/512] Fetching track data for Tinatangi...  done!
[142/512] Fetching track data for Binibini Sa MRT...  done!
[143/512] Fetching track data for Kahon...  done!
[144/512] Fetching track data for Pabili Po...  done!
[145/512] Fetching track data for Dumadagundong...  done!
[146/512] Fetching track data for Friday Night...  done!
[147/512] Fetching track data for Lahat...  done!
[148/512] Fetching track data for Nobody but You...  done!
[149/512] Fetching track data for Di Na Muli...  done!

[263/512] Fetching track data for When I Fall in Love - Minus One...  done!
[264/512] Fetching track data for Paano - Minus One...  done!
[265/512] Fetching track data for I Finally Found Someone - Minus One...  done!
[266/512] Fetching track data for Kahit Maputi Na Ang Buhok Ko - Minus One...  done!
[267/512] Fetching track data for Lord Patawad...  done!
[268/512] Fetching track data for Biglang Liko...  done!
[269/512] Fetching track data for Tao Lang...  done!
[270/512] Fetching track data for Dota O Ako...  done!
[271/512] Fetching track data for Paakyat Ka Pa Lang Pababa Na Ako...  done!
[272/512] Fetching track data for Tuloy Pa Rin...  done!
[273/512] Fetching track data for God Gave Me You...  done!
[274/512] Fetching track data for It Takes A Man And A Woman...  done!
[275/512] Fetching track data for Dota O Ako - Part 2...  done!
[276/512] Fetching track data for Lord Patawad...  done!
[277/512] Fetching track data for Biglang Liko...  done!
[278/512] Fetching track data fo

[399/512] Fetching track data for If I Let You Go...  done!
[400/512] Fetching track data for It Must Have Been Love...  done!
[401/512] Fetching track data for Runaway...  done!
[402/512] Fetching track data for Falling Away...  done!
[403/512] Fetching track data for PiNK (Pag-ibig Na Kaya?)...  done!
[404/512] Fetching track data for Tabing Ilog...  done!
[405/512] Fetching track data for Kung Puede Lang - Rock Version...  done!
[406/512] Fetching track data for I'm Your Friend...  done!
[407/512] Fetching track data for Himala ay Nawala...  done!
[408/512] Fetching track data for My Cherie Amour...  done!
[409/512] Fetching track data for Right Here Waiting...  done!
[410/512] Fetching track data for You And I...  done!
[411/512] Fetching track data for Grateful...  done!
[412/512] Fetching track data for Unbelievable...  done!
[413/512] Fetching track data for This Used To Be My Playground...  done!
[414/512] Fetching track data for Fake Plastic Trees...  done!
[415/512] Fetching 

In [198]:
tracks_data_df = pd.concat(df_list)
tracks_data_df.head()

Unnamed: 0,track_id,track_name,artist_id,artist_name,album_id,duration,release_date,popularity,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,playlist_id,playlist_name
0,2pAFgfaNVioQGhC0jhtcsG,Ipagpatawad Mo,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,5jzfPxlD5UD4Aw3cEnelcC,239333,2012-01-01,29,0.622,0.522,...,-9.099,0,0.0323,0.672,0.000184,0.102,0.375,139.938,2pAFgfaNVioQGhC0jhtcsG,2pAFgfaNVioQGhC0jhtcsG
0,33pHgWjuAXvmE6Z9b51avt,Bakit Ba Ganyan,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,5jzfPxlD5UD4Aw3cEnelcC,312293,2012-01-01,25,0.579,0.292,...,-8.784,1,0.0298,0.86,0.00911,0.129,0.245,140.024,33pHgWjuAXvmE6Z9b51avt,33pHgWjuAXvmE6Z9b51avt
0,3O7vNSKiSa5uWpaxjC4712,Pikit,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,5jzfPxlD5UD4Aw3cEnelcC,391080,2012-01-01,11,0.651,0.365,...,-8.716,1,0.0307,0.825,0.0181,0.11,0.151,140.09,3O7vNSKiSa5uWpaxjC4712,3O7vNSKiSa5uWpaxjC4712
0,2PTQBDNpLnjix9Dsa33d8l,Tuloy Pa Rin,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,5jzfPxlD5UD4Aw3cEnelcC,236453,2012-01-01,42,0.8,0.536,...,-7.013,1,0.0359,0.721,0.00741,0.0928,0.724,110.1,2PTQBDNpLnjix9Dsa33d8l,2PTQBDNpLnjix9Dsa33d8l
0,23kb0nomnO71oLqw7Okj3T,Magkasuyo Buong Gabi,3pTaNQJ7TAU4yL7KEZp5sF,Nyoy Volante,5jzfPxlD5UD4Aw3cEnelcC,235226,2012-01-01,21,0.524,0.43,...,-8.917,1,0.053,0.847,0.00145,0.138,0.533,108.331,23kb0nomnO71oLqw7Okj3T,23kb0nomnO71oLqw7Okj3T


In [199]:
tracks_data_df.to_csv("data/"+KEYWORD+"_album_tracks_data.csv", index=False, encoding='utf-8')

In [200]:
df_list

[                 track_id      track_name               artist_id  \
 0  2pAFgfaNVioQGhC0jhtcsG  Ipagpatawad Mo  3pTaNQJ7TAU4yL7KEZp5sF   
 
     artist_name                album_id  duration release_date  popularity  \
 0  Nyoy Volante  5jzfPxlD5UD4Aw3cEnelcC    239333   2012-01-01          29   
 
    danceability  energy  ...  loudness  mode  speechiness  acousticness  \
 0         0.622   0.522  ...    -9.099     0       0.0323         0.672   
 
    instrumentalness  liveness  valence    tempo             playlist_id  \
 0          0.000184     0.102    0.375  139.938  2pAFgfaNVioQGhC0jhtcsG   
 
             playlist_name  
 0  2pAFgfaNVioQGhC0jhtcsG  
 
 [1 rows x 21 columns],
                  track_id       track_name               artist_id  \
 0  33pHgWjuAXvmE6Z9b51avt  Bakit Ba Ganyan  3pTaNQJ7TAU4yL7KEZp5sF   
 
     artist_name                album_id  duration release_date  popularity  \
 0  Nyoy Volante  5jzfPxlD5UD4Aw3cEnelcC    312293   2012-01-01          25   
 
  