In [140]:
import numpy as np
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import keyring

## Setup Spotipy credentials and query wrapper

In [132]:
client_credentials_manager = SpotifyClientCredentials(client_id=keyring.get_password('spotify', 'cid'),
                                                      client_secret=keyring.get_password('spotify', 'secret') )
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


## Set keyword

In [218]:
KEYWORD = 'OPM'

## View structure of a search query

In [220]:
results = sp.search(q=KEYWORD, type='playlist' , market='PH')

In [None]:
results

In [222]:
results['playlists'].keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [224]:
results['playlists']['items'][0].keys()

dict_keys(['collaborative', 'description', 'external_urls', 'href', 'id', 'images', 'name', 'owner', 'primary_color', 'public', 'snapshot_id', 'tracks', 'type', 'uri'])

## Get top 200 playlists for keyword

In [133]:
playlist_ids = []
playlist_names = []
playlist_numtracks = []
for n in np.arange(4):
    offset= 50*n
    results = sp.search(q=KEYWORD, type='playlist' , market='PH', offset = offset, limit=50)
    playlist_ids.extend([p['href'].split('/')[5] for p in results['playlists']['items']])
    playlist_names.extend([p['name'] for p in results['playlists']['items']])
    playlist_numtracks.extend([p['tracks']['total'] for p in results['playlists']['items']])

In [195]:
playlist_names

['OPM Favorites',
 'OPM Says Chillax',
 "OPM Bands 90's - 00's",
 'OPM Chill Songs 2020 ☕',
 "OPM: Hits of the '00s",
 "OPM: Hits of the '90s",
 "OPM Lovesongs 80's 90's 20's",
 "OPM: Hits of the '80s",
 'OPM Classics',
 'OPM Rising',
 'OPM 2020 Playlist',
 'OPM 2000s (Batang 90s)',
 '',
 'OPM Bands Greatest Hits',
 'OPM HUGOT SONG 2020',
 'OPM Acoustic Hits 2020 💛',
 'OPM Workday Marathon',
 'OPM Hits 2019 -2020',
 'OPM Love songs 80s and 90s Ballads',
 'OPM Love Songs & Covers',
 "OPM TAGALOG LOVE SONGS 80's and 90's",
 'OPM Sad Songs☹️',
 'OPM Songs 2020 - OPM Love Songs Tagalog Playlist 2020 (New Filipino Song)',
 "OPM Rock & Alternative 90's - 2010's",
 'OPM Songs for Sleeping 😴',
 'OPM Inuman Sessions',
 'OPM Alternative Rock Hits',
 "OPM 70's 80's Tagalog",
 'Acoustically OPM',
 'OPM Rock Songs',
 'Top Hits Philippines',
 'Acoustic OPM',
 'Tatak Pinoy',
 'Men of OPM',
 'OPM Hits 2020 to 2021',
 'Top OPM Artists of 2018',
 'Women of OPM',
 'Christmas OPM',
 'Top OPM Artists of 20

## View structure of a playlist query

In [216]:
playlist = sp.playlist('37i9dQZF1DX4olOMiqFeqU')

In [None]:
playlist

In [217]:
playlist.keys()

dict_keys(['collaborative', 'description', 'external_urls', 'followers', 'href', 'id', 'images', 'name', 'owner', 'primary_color', 'public', 'snapshot_id', 'tracks', 'type', 'uri'])

## Get Playlist Data

In [196]:
playlist_lookup = []
for n,p_id in enumerate(playlist_ids):
    playlist = sp.playlist(p_id)
    try:
        relevant_playlist_data = { key: playlist[key] for key in ['followers','owner']}
        relevant_playlist_data['playlist_id'] = p_id
        relevant_playlist_data['playlist_names'] = playlist_names[n]
        relevant_playlist_data['playlist_total_tracks'] = playlist_numtracks[n]
        relevant_playlist_data['owner_id'] = playlist['owner']['id']
        relevant_playlist_data['owner_name'] = playlist['owner']['display_name']
        relevant_playlist_data['total_followers'] = playlist['followers']['total']
        relevant_playlist_data.pop('owner', None)
        relevant_playlist_data.pop('followers', None)
        playlist_lookup.append(relevant_playlist_data)
    except:
        continue

In [197]:
playlist_df = pd.DataFrame(playlist_lookup)
playlist_df =playlist_df.sort_values('total_followers',ascending=False)
playlist_df 

Unnamed: 0,playlist_id,playlist_names,playlist_total_tracks,owner_id,owner_name,total_followers
30,37i9dQZF1DXcZQSjptOQtk,Top Hits Philippines,52,spotify,Spotify,3863056
0,37i9dQZF1DX4olOMiqFeqU,OPM Favorites,50,spotify,Spotify,1983721
32,37i9dQZF1DX0iFfuXuP4Pm,Tatak Pinoy,60,spotify,Spotify,1286428
1,37i9dQZF1DX7Jerj8LqApV,OPM Says Chillax,40,spotify,Spotify,1160522
91,37i9dQZF1DWVjrTQlvqTHF,New Music Friday Philippines,106,spotify,Spotify,635808
...,...,...,...,...,...,...
103,58zdc3mv6rEnuKYIb0fJTO,Sad Songs Philippines 2020 😢,34,jyhra99st5nhex8kfo4um2dh7,buntala,180
171,5byY4odoEhAJ3mCN4mveTp,Tagalog opm bands 2020,42,31uybtfdnnuu33ugdqrddpsmrd24,Eris Cruz,157
123,5BYDziaKqhF2FvJR1h97MB,Opm acoustic covers,51,12177596513,Feb Afan Antolin,140
141,14vB1SjYBR33KunKaXco0E,OPM Christmas songs,62,0i1ck3bbu7s2w6r0m7mz8xl4x,0i1ck3bbu7s2w6r0m7mz8xl4x,114


In [213]:
playlist_df.to_csv("playlist_data.csv",encoding='utf=8',index=False)

## View structure of a playlist_tracks query

In [225]:
track = sp.playlist_tracks('37i9dQZF1DX4olOMiqFeqU')

In [228]:
track

{'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4olOMiqFeqU/tracks?offset=0&limit=100',
 'items': [{'added_at': '2020-06-15T10:31:00Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'},
    'href': 'https://api.spotify.com/v1/users/',
    'id': '',
    'type': 'user',
    'uri': 'spotify:user:'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/7374lH6kwx9uQATYQ9H3Cp'},
       'href': 'https://api.spotify.com/v1/artists/7374lH6kwx9uQATYQ9H3Cp',
       'id': '7374lH6kwx9uQATYQ9H3Cp',
       'name': 'Eraserheads',
       'type': 'artist',
       'uri': 'spotify:artist:7374lH6kwx9uQATYQ9H3Cp'}],
     'available_markets': ['AD',
      'AE',
      'AL',
      'AR',
      'AT',
      'BA',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'BY',
      'CA',
      'CH',
      'CL',
      'CO',
      'CR',
      'CY'

In [226]:
track.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [227]:
track['items'][0].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

## Get Tracks Data

In [198]:
def get_relevant_track_data(tracks_data, playlist_id):
    try:
        relevant_track_data = { key: tracks_data['track'][key] for key in ['id','artists','name','popularity','duration_ms'] }
        relevant_track_data['artist_id']=[artist['id'] for artist in relevant_track_data['artists'] ]
        relevant_track_data['artist_name']=[artist['name']for artist in relevant_track_data['artists'] ]
        relevant_track_data['num_artists']=len([artist['id'] for artist in relevant_track_data['artists']]) 
        relevant_track_data['playlist_id']=playlist_id
        relevant_track_data.pop('artists', None)
        return relevant_track_data
    except:
        return 

In [199]:
#playlist_tracks
all_track_data = []

for p_name,p_id, p_numtracks in list(zip(playlist_names,playlist_ids,playlist_numtracks)):
    print("Fetching data for playlist %s with total tracks: %d" % (p_name,p_numtracks))
    n_fetches = p_numtracks // 100
    
    playlist_track_data = []
    
    for n in np.arange(n_fetches+1):
        track_data = sp.playlist_tracks(p_id, offset=n*100)
        playlist_track_data.extend([get_relevant_track_data(item, p_id) for item in track_data['items']])
        
    all_track_data.extend(playlist_track_data)

Fetching data for playlist OPM Favorites with total tracks: 50
Fetching data for playlist OPM Says Chillax with total tracks: 40
Fetching data for playlist OPM Bands 90's - 00's with total tracks: 348
Fetching data for playlist OPM Chill Songs 2020 ☕ with total tracks: 49
Fetching data for playlist OPM: Hits of the '00s with total tracks: 40
Fetching data for playlist OPM: Hits of the '90s with total tracks: 35
Fetching data for playlist OPM Lovesongs 80's 90's 20's with total tracks: 285
Fetching data for playlist OPM: Hits of the '80s with total tracks: 40
Fetching data for playlist OPM Classics with total tracks: 267
Fetching data for playlist OPM Rising with total tracks: 80
Fetching data for playlist OPM 2020 Playlist with total tracks: 40
Fetching data for playlist OPM 2000s (Batang 90s) with total tracks: 148
Fetching data for playlist  with total tracks: 47
Fetching data for playlist OPM Bands Greatest Hits with total tracks: 66
Fetching data for playlist OPM HUGOT SONG 2020 wi

Fetching data for playlist Senti - OPM with total tracks: 175
Fetching data for playlist SENTI OPM HITS with total tracks: 225
Fetching data for playlist Locals (OPM) 🇵🇭 with total tracks: 217
Fetching data for playlist Filipino OPM songs with total tracks: 202
Fetching data for playlist Opm 2020 lovesong with total tracks: 77
Fetching data for playlist Wish107.5 OPM mix  with total tracks: 106
Fetching data for playlist Nina, MYMP, Freestyle & OPM's with total tracks: 169
Fetching data for playlist Mellow OPM with total tracks: 102
Fetching data for playlist This Is Sponge Cola with total tracks: 37
Fetching data for playlist Opm acoustic covers with total tracks: 51
Fetching data for playlist OPM Jukebox Hits📀📀📀 with total tracks: 76
Fetching data for playlist alternative rock #opm🇵🇭🇵🇭 with total tracks: 141
Fetching data for playlist NEW OPM Pinoy Covers 🌺 2019-2020 with total tracks: 69
Fetching data for playlist OPM Tiktok Hits 2020! 💃 with total tracks: 40
Fetching data for playl

In [204]:
len(all_track_data[0])

8

In [205]:
len(all_track_data)

19547

In [207]:
for n,a in enumerate(all_track_data):
    try:
        len(a)
    except:
        print(n)

1675
7199


In [211]:
tracks_df = pd.DataFrame([data for data in all_track_data if data is not None])
tracks_df = tracks_df.rename(columns={'id':'track_id'})
tracks_df['artist_id'] = tracks_df.apply(lambda x: x['artist_id'][0] if x['num_artists']==1 else x['artist_id'], axis=1)
tracks_df['artist_name'] = tracks_df.apply(lambda x: x['artist_name'][0] if x['num_artists']==1 else x['artist_name'], axis=1)
tracks_df.head()

Unnamed: 0,track_id,name,popularity,duration_ms,artist_id,artist_name,num_artists,playlist_id
0,6SHSvo6OGgpneKfsxFGhhk,Ligaya,56,271466,7374lH6kwx9uQATYQ9H3Cp,Eraserheads,1,37i9dQZF1DX4olOMiqFeqU
1,4T7CM071eEoHudiDShZlCJ,Tala,64,245053,6aiCKnIN68hohzU3ZzNq48,Sarah Geronimo,1,37i9dQZF1DX4olOMiqFeqU
2,58grXgbCj7t5ulr0TGLIr9,Kahit Ayaw Mo Na,66,243015,5HIZU0JzM0AgfItVOm4E08,This Band,1,37i9dQZF1DX4olOMiqFeqU
3,00mBzIWv5gHOYxwuEJXjOG,Sa Ngalan Ng Pag-Ibig,67,285138,4qFxP3qN9GsnZDOkAE6x2m,December Avenue,1,37i9dQZF1DX4olOMiqFeqU
4,3U7TJUjNz7pgPSfgeGQjsP,Forevermore,59,294716,4BNWanhw4AjSXjBm9L1Jzy,Juris,1,37i9dQZF1DX4olOMiqFeqU


In [214]:
tracks_df.to_csv("playlist_tracks.csv",encoding='utf=8',index=False)