In [1]:
import numpy as np
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import keyring
import time

In [2]:
# pip install spotipy

## 0. Setup Spotipy credentials and query wrapper

In [3]:
client_credentials_manager = SpotifyClientCredentials(client_id=keyring.get_password('spotify', 'cid'),
                                                      client_secret=keyring.get_password('spotify', 'secret') )
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

Set keyword

In [4]:
KEYWORD = 'Folk pop philippines'

## 1. Search for the top N playlists for keyword

##### View the structure of a search query

In [5]:
results = sp.search(q=KEYWORD, type='playlist', market='PH')

In [6]:
[r['name'] for r in results['playlists']['items']]

['Country Songs Philippines 2020 🏡',
 'Indie Filipino Bands & Artists - Filipino Heritage Month',
 'Folk Pop Philippines 2020🌴',
 'East and Southeast Asian Oldies (1950s-1980s)',
 'CHILL MUSIC WITH BEN&BEN POPULAR SONG']

In [7]:
results['playlists'].keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [8]:
results['playlists']['items'][0].keys()

dict_keys(['collaborative', 'description', 'external_urls', 'href', 'id', 'images', 'name', 'owner', 'primary_color', 'public', 'snapshot_id', 'tracks', 'type', 'uri'])

***

In [9]:
playlist_ids = []
playlist_names = []
playlist_numtracks = []

N = 100
#get playlist in batches of 50
for n in np.arange(N//50):
    offset= 50*n
    print("Getting batch %d of search results for keyword: %s ..." % (n,KEYWORD), end='' )
    results = sp.search(q=KEYWORD, type='playlist' , market='PH', offset = offset, limit=50)
    playlist_ids.extend([p['href'].split('/')[5] for p in results['playlists']['items']])
    playlist_names.extend([p['name'] for p in results['playlists']['items']])
    playlist_numtracks.extend([p['tracks']['total'] for p in results['playlists']['items']])
    print("  DONE!")

Getting batch 0 of search results for keyword: Folk pop philippines ...  DONE!
Getting batch 1 of search results for keyword: Folk pop philippines ...  DONE!


In [10]:
playlist_names

['Country Songs Philippines 2020 🏡',
 'Indie Filipino Bands & Artists - Filipino Heritage Month',
 'Folk Pop Philippines 2020🌴',
 'East and Southeast Asian Oldies (1950s-1980s)',
 'CHILL MUSIC WITH BEN&BEN POPULAR SONG']

## 2. Get Playlist Data

##### View the structure of a playlist query

In [13]:
playlist = sp.playlist('37i9dQZF1DX4olOMiqFeqU')

In [14]:
playlist

{'collaborative': False,
 'description': '100% Pinoy hits from your favorite artists!',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/37i9dQZF1DX4olOMiqFeqU'},
 'followers': {'href': None, 'total': 1991464},
 'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4olOMiqFeqU?additional_types=track',
 'id': '37i9dQZF1DX4olOMiqFeqU',
 'images': [{'height': None,
   'url': 'https://i.scdn.co/image/ab67706f00000003686604cfc2467f94dfcd8e3a',
   'width': None}],
 'name': 'OPM Favorites',
 'owner': {'display_name': 'Spotify',
  'external_urls': {'spotify': 'https://open.spotify.com/user/spotify'},
  'href': 'https://api.spotify.com/v1/users/spotify',
  'id': 'spotify',
  'type': 'user',
  'uri': 'spotify:user:spotify'},
 'primary_color': None,
 'public': False,
 'snapshot_id': 'MTYwMjUwNzEzMiwwMDAwMDAwMGYyZGQ1NmM3ZGJiZGQ5MmQ5MTI5ZTg4MWYyMWIzNWY1',
 'tracks': {'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4olOMiqFeqU/tracks?offset=0&limit=100&additional_types=tr

In [15]:
playlist.keys()

dict_keys(['collaborative', 'description', 'external_urls', 'followers', 'href', 'id', 'images', 'name', 'owner', 'primary_color', 'public', 'snapshot_id', 'tracks', 'type', 'uri'])

***

In [23]:
playlist_lookup = []
for n,p_id in enumerate(playlist_ids):
    print("Getting playlist data for playlist %s :..." % (playlist_names[n]), end='' )
    playlist = sp.playlist(p_id)
    try:
        print("Random")
        relevant_playlist_data = { key: playlist[key] for key in ['followers','owner']}
        relevant_playlist_data['playlist_id'] = p_id
        relevant_playlist_data['playlist_name'] = playlist_names[n]
        relevant_playlist_data['playlist_total_tracks'] = playlist_numtracks[n]
        relevant_playlist_data['owner_id'] = playlist['owner']['id']
        relevant_playlist_data['owner_name'] = playlist['owner']['display_name']
        relevant_playlist_data['total_followers'] = playlist['followers']['total']
        relevant_playlist_data.pop('owner', None)
        relevant_playlist_data.pop('followers', None)
        playlist_lookup.append(relevant_playlist_data)
        print("   DONE")
    except:
        print("   Aborted")
        continue
    

Getting playlist data for playlist Country Songs Philippines 2020 🏡 :...Random
   DONE
Getting playlist data for playlist Indie Filipino Bands & Artists - Filipino Heritage Month :...Random
   DONE
Getting playlist data for playlist Folk Pop Philippines 2020🌴 :...Random
   DONE
Getting playlist data for playlist East and Southeast Asian Oldies (1950s-1980s) :...Random
   DONE
Getting playlist data for playlist CHILL MUSIC WITH BEN&BEN POPULAR SONG :...Random
   DONE


In [24]:
playlist_df = pd.DataFrame(playlist_lookup)
playlist_df =playlist_df.sort_values('total_followers',ascending=False)
playlist_df 

Unnamed: 0,playlist_id,playlist_name,playlist_total_tracks,owner_id,owner_name,total_followers
1,2yTYhOSAUxXMd216jqPSYa,Indie Filipino Bands & Artists - Filipino Heri...,140,1260299844,Conrad Merced,244
3,68Onq0hDNGQbTu4OnAugyi,East and Southeast Asian Oldies (1950s-1980s),9997,1269581343,Eric Brightwell,97
4,1eg8S1ZNs7iKsKOzqObZPG,CHILL MUSIC WITH BEN&BEN POPULAR SONG,31,187osukm2yqjnmjhwsy3ji7mn,Zai,44
2,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴,59,jyhra99st5nhex8kfo4um2dh7,buntala,28
0,6BVyav6bQG6tOZQyAM2q33,Country Songs Philippines 2020 🏡,47,jyhra99st5nhex8kfo4um2dh7,buntala,16


In [25]:
#playlist name must contain the keyword
playlist_df = playlist_df[playlist_df['playlist_name'].str.lower().str.contains(KEYWORD.lower())]

In [26]:
playlist_df.to_csv("data/"+KEYWORD+"_playlist_data.csv",encoding='utf=8',index=False)

## 3. Get Tracks from a Playlist

##### View the structure of a playlist_tracks query

In [27]:
track = sp.playlist_tracks('37i9dQZF1DX4olOMiqFeqU')

In [28]:
track

{'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4olOMiqFeqU/tracks?offset=0&limit=100&additional_types=track',
 'items': [{'added_at': '2020-10-05T08:50:05Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'},
    'href': 'https://api.spotify.com/v1/users/',
    'id': '',
    'type': 'user',
    'uri': 'spotify:user:'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/7374lH6kwx9uQATYQ9H3Cp'},
       'href': 'https://api.spotify.com/v1/artists/7374lH6kwx9uQATYQ9H3Cp',
       'id': '7374lH6kwx9uQATYQ9H3Cp',
       'name': 'Eraserheads',
       'type': 'artist',
       'uri': 'spotify:artist:7374lH6kwx9uQATYQ9H3Cp'}],
     'available_markets': ['AD',
      'AE',
      'AL',
      'AR',
      'AT',
      'BA',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'BY',
      'CA',
      'CH',
      'CL',
      'CO',

In [29]:
track.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [30]:
track['items'][0].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

***

In [31]:
#get only top 20 followed playlists
playlist_df = playlist_df.head(20)

In [32]:
def get_relevant_track_data(tracks_data, playlist_id, playlist_name):
    try:
        relevant_track_data = { key: tracks_data['track'][key] for key in ['id','artists','name','popularity','duration_ms'] }
        relevant_track_data['artist_id']=[artist['id'] for artist in relevant_track_data['artists'] ]
        relevant_track_data['artist_name']=[artist['name']for artist in relevant_track_data['artists'] ]
        relevant_track_data['num_artists']=len([artist['id'] for artist in relevant_track_data['artists']]) 
        relevant_track_data['playlist_id']=playlist_id
        relevant_track_data['playlist_name']=playlist_name
        
        relevant_track_data.pop('artists', None)
        return relevant_track_data
    except:
        return 

In [33]:
#playlist_tracks
all_track_data = []

for _,p_id,p_name, p_numtracks in playlist_df[['playlist_id','playlist_name','playlist_total_tracks']].to_records():
    print("Fetching data for playlist = %s, with total tracks: %d" % (p_name,p_numtracks))
    n_fetches = p_numtracks // 100
    
    playlist_track_data = []
    #get tracks in batches of 100
    for n in np.arange(n_fetches+1):
        track_data = sp.playlist_tracks(p_id, offset=n*100)
        playlist_track_data.extend([get_relevant_track_data(item, p_id,p_name) for item in track_data['items']])
        
    all_track_data.extend(playlist_track_data)

Fetching data for playlist = Folk Pop Philippines 2020🌴, with total tracks: 59


In [34]:
for n,a in enumerate(all_track_data):
    try:
        len(a)
    except:
        print(n)

In [35]:
tracks_df = pd.DataFrame([data for data in all_track_data if data is not None])
tracks_df = tracks_df.rename(columns={'id':'track_id'})
tracks_df['artist_id'] = tracks_df.apply(lambda x: x['artist_id'][0] if x['num_artists']==1 else x['artist_id'], axis=1)
tracks_df['artist_name'] = tracks_df.apply(lambda x: x['artist_name'][0] if x['num_artists']==1 else x['artist_name'], axis=1)
tracks_df.head()

Unnamed: 0,track_id,name,popularity,duration_ms,artist_id,artist_name,num_artists,playlist_id,playlist_name
0,3WUEs51GpcvlgU7lehLgLh,Kathang Isip,71,318814,4DAcJXcjX0zlQAZAPAx4Zb,Ben&Ben,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
1,3vUqB1JscVAbi89WhhyGxg,Mauli,45,217718,"[007MmXwT1HwcXwuyROgNJb, 2Iy98jb5fPkl4jreGUbnA0]","[Fred Engay, Mei Teves]",2,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
2,4R2kfaDFhslZEMJqAFNpdd,cardigan,86,239560,06HL4z0CvFAxyc27GXpf02,Taylor Swift,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
3,0oug9t7drnkDuDpiDatvei,paruparo,42,227263,3BQ7MYNeB0wMPtHYPWvu1C,syd hartha,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
4,5KoE6YC2zs6Xo9irRTkFhx,Tides,39,279003,5TPt1cZ847Fr9CWd9vAm7s,The Ransom Collective,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴


In [36]:
len(tracks_df)

59

In [37]:
len(tracks_df['track_id'].unique())

59

In [38]:
tracks_df.to_csv("data/"+KEYWORD+"_playlist_tracks.csv",encoding='utf=8',index=False)

## 4. Get Tracks from a Playlist

In [39]:
tracks_df = pd.read_csv("data/"+KEYWORD+"_playlist_tracks.csv")
tracks_df.head()

Unnamed: 0,track_id,name,popularity,duration_ms,artist_id,artist_name,num_artists,playlist_id,playlist_name
0,3WUEs51GpcvlgU7lehLgLh,Kathang Isip,71,318814,4DAcJXcjX0zlQAZAPAx4Zb,Ben&Ben,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
1,3vUqB1JscVAbi89WhhyGxg,Mauli,45,217718,"['007MmXwT1HwcXwuyROgNJb', '2Iy98jb5fPkl4jreGU...","['Fred Engay', 'Mei Teves']",2,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
2,4R2kfaDFhslZEMJqAFNpdd,cardigan,86,239560,06HL4z0CvFAxyc27GXpf02,Taylor Swift,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
3,0oug9t7drnkDuDpiDatvei,paruparo,42,227263,3BQ7MYNeB0wMPtHYPWvu1C,syd hartha,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
4,5KoE6YC2zs6Xo9irRTkFhx,Tides,39,279003,5TPt1cZ847Fr9CWd9vAm7s,The Ransom Collective,1,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴


In [40]:
tracks_df.shape

(59, 9)

In [41]:
#remove track duplicates
tracks_df = tracks_df.drop_duplicates(subset='track_id')
tracks_df.shape

(59, 9)

In [42]:
def get_track_data(t_id, playlist_id,playlist_name):                    
    track_data = sp.track(t_id)
    track_features = sp.audio_features(t_id)
    
    #get only main(first) artist
    td_list = [t_id,\
               track_data['name'],\
               track_data['artists'][0]['id'],\
               track_data['artists'][0]['name'],\
               track_data['album']['uri'].split(":")[2],\
               track_data['duration_ms'],\
               track_data['album']['release_date'],\
               track_data['popularity']]
    data = pd.DataFrame([td_list], columns = ['track_id','track_name','artist_id','artist_name','album_id','duration','release_date','popularity'])

    relevant_cols = ['danceability', 'energy', 'key', 'loudness', 'mode',\
                     'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']  
    
    tf_data = pd.DataFrame(track_features)
    tf_data = tf_data[relevant_cols]
    #tag with source playlist
    tf_data['playlist_id'] = playlist_id
    tf_data['playlist_name'] = playlist_name
    
    data = pd.concat([data, tf_data], axis=1)
    return data


In [51]:
downloaded_track_data = []
df_list = []

In [52]:
track_list = tracks_df['track_id'].values
playlist_name_list = tracks_df['playlist_name'].values
playlist_id_list = tracks_df['playlist_id'].values

for i,track_id in enumerate(track_list):
    try:
        if track_id not in downloaded_track_data:
            print('[%d/%d] Fetching track data for %s... ' % 
                  (i+1,len(track_list),tracks_df[tracks_df['track_id']==track_id]['name'].values[0]), end = " ") 
            track_data = get_track_data(track_id, playlist_id_list[i],playlist_name_list[i]) 
            df_list.append(track_data)
            downloaded_track_data.append(track_id)
            print('done!')
    except:
        continue
    else:
        continue
    
    #sleep for 60 secs per 100 requests to avoid being blocked
    if (i % 100 == 0)&(i > 0):
        time.sleep(10)    

[1/59] Fetching track data for Kathang Isip...  done!
[2/59] Fetching track data for Mauli...  done!
[3/59] Fetching track data for cardigan...  done!
[4/59] Fetching track data for paruparo...  done!
[5/59] Fetching track data for Tides...  done!
[6/59] Fetching track data for Tenerife Sea...  done!
[7/59] Fetching track data for Doors...  done!
[8/59] Fetching track data for Mabuti Pa Sila...  done!
[9/59] Fetching track data for 93 Million Miles...  done!
[10/59] Fetching track data for Nakikinig Ka Ba Sa Akin...  done!
[11/59] Fetching track data for Tagu-Taguan...  done!
[12/59] Fetching track data for Pagtingin...  done!
[13/59] Fetching track data for Dampi...  done!
[14/59] Fetching track data for Multiplayer...  done!
[15/59] Fetching track data for Ride Home...  done!
[16/59] Fetching track data for Hayaan Mo Na...  done!
[17/59] Fetching track data for puydi ba...  done!
[18/59] Fetching track data for Bawat Daan...  done!
[19/59] Fetching track data for Settled...  done!
[2

In [53]:
df_list

[                 track_id    track_name               artist_id artist_name  \
 0  3WUEs51GpcvlgU7lehLgLh  Kathang Isip  4DAcJXcjX0zlQAZAPAx4Zb     Ben&Ben   
 
                  album_id  duration release_date  popularity  danceability  \
 0  4nQxblVnnFPehg9ujzdJ8L    318814   2017-06-23          71         0.491   
 
    energy  ...  loudness  mode  speechiness  acousticness  instrumentalness  \
 0   0.314  ...     -8.58     1       0.0301         0.776          0.000005   
 
    liveness  valence    tempo             playlist_id  \
 0     0.152    0.295  126.921  0RzgbYyq7YLdvH1uK6pOhz   
 
                 playlist_name  
 0  Folk Pop Philippines 2020🌴  
 
 [1 rows x 21 columns],
                  track_id track_name               artist_id artist_name  \
 0  3vUqB1JscVAbi89WhhyGxg      Mauli  007MmXwT1HwcXwuyROgNJb  Fred Engay   
 
                  album_id  duration release_date  popularity  danceability  \
 0  4vRlXeVY0dyeOin2VolT3t    217718   2020-03-02          45         0

In [54]:
tracks_data_df = pd.concat(df_list)
tracks_data_df.head()

Unnamed: 0,track_id,track_name,artist_id,artist_name,album_id,duration,release_date,popularity,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,playlist_id,playlist_name
0,3WUEs51GpcvlgU7lehLgLh,Kathang Isip,4DAcJXcjX0zlQAZAPAx4Zb,Ben&Ben,4nQxblVnnFPehg9ujzdJ8L,318814,2017-06-23,71,0.491,0.314,...,-8.58,1,0.0301,0.776,5e-06,0.152,0.295,126.921,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
0,3vUqB1JscVAbi89WhhyGxg,Mauli,007MmXwT1HwcXwuyROgNJb,Fred Engay,4vRlXeVY0dyeOin2VolT3t,217718,2020-03-02,45,0.269,0.152,...,-12.19,1,0.0319,0.921,0.0,0.137,0.369,171.38,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
0,4R2kfaDFhslZEMJqAFNpdd,cardigan,06HL4z0CvFAxyc27GXpf02,Taylor Swift,2fenSS68JI1h4Fo296JfGr,239560,2020-07-24,86,0.613,0.581,...,-8.588,0,0.0424,0.537,0.000345,0.25,0.551,130.033,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
0,0oug9t7drnkDuDpiDatvei,paruparo,3BQ7MYNeB0wMPtHYPWvu1C,syd hartha,5yMcGhC4JxjcK3K0H8FRLD,227263,2019-10-31,42,0.49,0.368,...,-9.631,1,0.0531,0.385,0.00152,0.109,0.619,98.896,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴
0,5KoE6YC2zs6Xo9irRTkFhx,Tides,5TPt1cZ847Fr9CWd9vAm7s,The Ransom Collective,3LWiguQVKE7gAP1PsAyrwv,279003,2017-05-20,39,0.414,0.75,...,-7.594,1,0.0275,0.012,0.00154,0.0632,0.299,90.054,0RzgbYyq7YLdvH1uK6pOhz,Folk Pop Philippines 2020🌴


In [55]:
tracks_data_df.to_csv("data/"+KEYWORD+"_playlist_tracks_data.csv", index=False, encoding='utf-8')