In [1]:
import numpy as np
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import keyring
import time

## 0. Setup Spotipy credentials and query wrapper

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id=keyring.get_password('spotify', 'cid'),
                                                      client_secret=keyring.get_password('spotify', 'secret') )
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


Set keyword

In [3]:
KEYWORD = 'electronic songs'

## 1. Search for the top N playlists for keyword

##### View the structure of a search query

In [4]:
results = sp.search(q=KEYWORD, type='playlist', market='PH')

In [5]:
results

{'playlists': {'href': 'https://api.spotify.com/v1/search?query=electronic+songs&type=playlist&market=PH&offset=0&limit=10',
  'items': [{'collaborative': False,
    'description': 'Top 80s &amp; 90s Dance Songs. Classic Dance Music Hits &amp; Euro Dance from Eighties &amp; Nineties',
    'external_urls': {'spotify': 'https://open.spotify.com/playlist/5RFz5RUGbPCaS0yObBrD2z'},
    'href': 'https://api.spotify.com/v1/playlists/5RFz5RUGbPCaS0yObBrD2z',
    'id': '5RFz5RUGbPCaS0yObBrD2z',
    'images': [{'height': None,
      'url': 'https://i.scdn.co/image/ab67706c0000bebbe84f0c696e21602e15eb6208',
      'width': None}],
    'name': "Dance Music Hits 80s 90s | Best Dance Electronic Songs of the 80's & 90's",
    'owner': {'display_name': 'Listanauta',
     'external_urls': {'spotify': 'https://open.spotify.com/user/listanauta'},
     'href': 'https://api.spotify.com/v1/users/listanauta',
     'id': 'listanauta',
     'type': 'user',
     'uri': 'spotify:user:listanauta'},
    'primary_co

In [6]:
[r['name'] for r in results['playlists']['items']]

["Dance Music Hits 80s 90s | Best Dance Electronic Songs of the 80's & 90's",
 'Gym Workout Music. Hits of EDM Dance & Electronic Songs',
 'Top 50 Summer Electronic Songs of All Time (updated in July 2020)',
 'Best Electronic Songs of All Time - Most Popular Electronic Music (Updated in 2020)',
 'Billboard Hot Dance/Electronic Songs',
 'Electronic Songs',
 'Instrumental Electronic Songs',
 'Hot Dance/Electronic Songs 2015',
 'Running Songs 2020 ',
 'Chillout 2020']

In [7]:
[r['description'] for r in results['playlists']['items']]

['Top 80s &amp; 90s Dance Songs. Classic Dance Music Hits &amp; Euro Dance from Eighties &amp; Nineties',
 'Best Songs for Gym Workout Motivation. Electro Dance Music, Electronic Pop, Dance &amp; More Music Hits',
 'In the next year, you will be able to find this playlist with the next title: Top 50 Summer Electronic Songs of All Time (updated in July 2021)',
 'In the next year, you will be able to find this playlist with the next title: Best Electronic Songs of All Time - Most Popular Electronic Music (Updated in 2021)',
 '',
 '',
 '',
 'Musica electronica del año 2015',
 'Running Music Hits - Songs to run to - Run Music - Running playlist - Electronic Running - Motivation Mix - Treino',
 'Chill Out Songs and relaxing music. 🌞Including Deep House, Chill Music, Tropical Sounds and Electronic Music 😎– Electronica, Relaxing Songs, Chill Mix, Lounge Music, Beach Music, Cafe del mar, Buddha Bar, Hotel Costes, Nikki Beach. Follow us on <a href="https://wad.lnk.to/instagram">Instagram</a>!']

In [8]:
results['playlists'].keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [9]:
results['playlists']['items'][0].keys()

dict_keys(['collaborative', 'description', 'external_urls', 'href', 'id', 'images', 'name', 'owner', 'primary_color', 'public', 'snapshot_id', 'tracks', 'type', 'uri'])

In [10]:
results['playlists']['items'][0]['tracks']['total']

60

***

In [11]:
N=100
N//50

2

In [12]:
playlist_ids = []
playlist_names = []
playlist_numtracks = []

N = 100
#get playlist in batches of 50
for n in np.arange(N//50):
    offset= 50*n
    print("Getting batch %d of search results for keyword: %s ..." % (n,KEYWORD), end='' )
    results = sp.search(q=KEYWORD, type='playlist' , market='PH', offset = offset, limit=50)
    playlist_ids.extend([p['href'].split('/')[5] for p in results['playlists']['items']])
    playlist_names.extend([p['name'] for p in results['playlists']['items']])
    playlist_numtracks.extend([p['tracks']['total'] for p in results['playlists']['items']])
    print("  DONE!")

Getting batch 0 of search results for keyword: electronic songs ...  DONE!
Getting batch 1 of search results for keyword: electronic songs ...  DONE!


In [13]:
playlist_names

["Dance Music Hits 80s 90s | Best Dance Electronic Songs of the 80's & 90's",
 'Top 100 Most Popular Electronic Songs of all Time',
 'Gym Workout Music. Hits of EDM Dance & Electronic Songs',
 'Best Electronic Songs of All Time - Most Popular Electronic Music (Updated in 2020)',
 'Top 50 Summer Electronic Songs of All Time (updated in July 2020)',
 'Billboard Hot Dance/Electronic Songs',
 'Electronic Songs',
 'Instrumental Electronic Songs',
 'Hot Dance/Electronic Songs 2015',
 'Running Songs 2020 ',
 'Chillout 2020',
 "BPM - Today's Dance Hits",
 'EDM Remixes of Popular Songs',
 'Sex Songs 🔞🔥 : Alt Pop / Dark Pop / Electronic / Bedroom Jam / Sexy Vibes ',
 'Electronic: Study No Words',
 'La Mejor Música Electrónica Pop, Trap | Tomorrowland 2020 | 3L3CTRONICA',
 '👑 Best Remixes of Popular Songs 2020 👑',
 'Future Bass Hits ⚡ Electrónica 2020 ⚡ Chill Music 2020',
 'The Climb 170-175 BPM',
 'Chill Electronic Music 2019',
 'Newest Electronic / House Songs 2020',
 'Secret Electronica',
 'Su

## 2. Get Playlist Data

##### View the structure of a playlist query

In [14]:
playlist = sp.playlist('37i9dQZF1DX4olOMiqFeqU')

In [15]:
playlist

{'collaborative': False,
 'description': '100% Pinoy hits from your favorite artists!',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/37i9dQZF1DX4olOMiqFeqU'},
 'followers': {'href': None, 'total': 1991844},
 'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4olOMiqFeqU?additional_types=track',
 'id': '37i9dQZF1DX4olOMiqFeqU',
 'images': [{'height': None,
   'url': 'https://i.scdn.co/image/ab67706f00000003686604cfc2467f94dfcd8e3a',
   'width': None}],
 'name': 'OPM Favorites',
 'owner': {'display_name': 'Spotify',
  'external_urls': {'spotify': 'https://open.spotify.com/user/spotify'},
  'href': 'https://api.spotify.com/v1/users/spotify',
  'id': 'spotify',
  'type': 'user',
  'uri': 'spotify:user:spotify'},
 'primary_color': None,
 'public': False,
 'snapshot_id': 'MTYwMjU5MTA0OSwwMDAwMDAwMGYyZGQ1NmM3ZGJiZGQ5MmQ5MTI5ZTg4MWYyMWIzNWY1',
 'tracks': {'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4olOMiqFeqU/tracks?offset=0&limit=100&additional_types=tr

In [16]:
playlist.keys()

dict_keys(['collaborative', 'description', 'external_urls', 'followers', 'href', 'id', 'images', 'name', 'owner', 'primary_color', 'public', 'snapshot_id', 'tracks', 'type', 'uri'])

***

In [17]:
playlist_lookup = []
for n,p_id in enumerate(playlist_ids):
    print("Getting playlist data for playlist %s :..." % (playlist_names[n]), end='' )
    playlist = sp.playlist(p_id)
    try:
        relevant_playlist_data = { key: playlist[key] for key in ['followers','owner']}
        relevant_playlist_data['playlist_id'] = p_id
        relevant_playlist_data['playlist_name'] = playlist_names[n]
        relevant_playlist_data['playlist_total_tracks'] = playlist_numtracks[n]
        relevant_playlist_data['owner_id'] = playlist['owner']['id']
        relevant_playlist_data['owner_name'] = playlist['owner']['display_name']
        relevant_playlist_data['total_followers'] = playlist['followers']['total']
        relevant_playlist_data.pop('owner', None)
        relevant_playlist_data.pop('followers', None)
        playlist_lookup.append(relevant_playlist_data)
        print("   DONE")
    except:
        print("   Aborted")
        continue
    

Getting playlist data for playlist Dance Music Hits 80s 90s | Best Dance Electronic Songs of the 80's & 90's :...   DONE
Getting playlist data for playlist Top 100 Most Popular Electronic Songs of all Time :...   DONE
Getting playlist data for playlist Gym Workout Music. Hits of EDM Dance & Electronic Songs :...   DONE
Getting playlist data for playlist Best Electronic Songs of All Time - Most Popular Electronic Music (Updated in 2020) :...   DONE
Getting playlist data for playlist Top 50 Summer Electronic Songs of All Time (updated in July 2020) :...   DONE
Getting playlist data for playlist Billboard Hot Dance/Electronic Songs :...   DONE
Getting playlist data for playlist Electronic Songs :...   DONE
Getting playlist data for playlist Instrumental Electronic Songs :...   DONE
Getting playlist data for playlist Hot Dance/Electronic Songs 2015 :...   DONE
Getting playlist data for playlist Running Songs 2020  :...   DONE
Getting playlist data for playlist Chillout 2020 :...   DONE
Get

In [18]:
playlist_df = pd.DataFrame(playlist_lookup)
playlist_df = playlist_df.sort_values('total_followers',ascending=False)
playlist_df 

Unnamed: 0,playlist_id,playlist_name,playlist_total_tracks,owner_id,owner_name,total_followers
10,7ozIozDp260fjNOZy1yzRG,Chillout 2020,383,wearediamond,We Are Diamond,678972
24,2oTrz1dYy2l970ptp4KZta,Chillout,168,electroposé,Electro Posé,215903
9,4cgeOaRCHDkVDQPaDrRQFR,Running Songs 2020,136,43xzlwfo0tft7hn5r5e6dksrd,LoudKult,158524
11,37i9dQZF1DX7SEhw42DW5b,BPM - Today's Dance Hits,70,spotify,Spotify,79531
18,37i9dQZF1DWV3VLITCZusq,The Climb 170-175 BPM,51,spotify,Spotify,74183
...,...,...,...,...,...,...
84,3govTtbYOtkwPYg10SkDcJ,EDM Tiktok Songs: Tiktok Dance Songs (Electron...,55,vmopbed9tgj5wx6ctug9fig1a,Tiktok Hits,100
8,18ScG1ZPz4XnGlzJWksQ1K,Hot Dance/Electronic Songs 2015,98,antoniox_x,antoniox_x,68
56,41VlRPCYUtsKacWX6fTffn,Clean EDM Workout 2020 (Clean EDM Songs 2020) ...,54,ks5lgsyb7h25y431umnumwpcs,FITNESS ROUTINE,53
1,5sTpFnFtyMwawbqsFfCwAt,Top 100 Most Popular Electronic Songs of all Time,114,krcghojgx24xhteqsbj990fb5,MaxiF,33


In [19]:
#playlist name must contain the keyword
#fix keyword matching 
KEYWORD_regex = KEYWORD.split()
playlist_df = playlist_df[playlist_df['playlist_name'].str.lower().str.contains('&'.join(KEYWORD_regex).lower())]

In [20]:
playlist_df.to_csv("data/"+KEYWORD+"_playlist_data.csv",encoding='utf=8',index=False)

## 3. Get Tracks from a Playlist

##### View the structure of a playlist_tracks query

In [21]:
track = sp.playlist_tracks('37i9dQZF1DX4olOMiqFeqU')

In [22]:
track

{'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX4olOMiqFeqU/tracks?offset=0&limit=100&additional_types=track',
 'items': [{'added_at': '2020-10-05T08:50:05Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'},
    'href': 'https://api.spotify.com/v1/users/',
    'id': '',
    'type': 'user',
    'uri': 'spotify:user:'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/7374lH6kwx9uQATYQ9H3Cp'},
       'href': 'https://api.spotify.com/v1/artists/7374lH6kwx9uQATYQ9H3Cp',
       'id': '7374lH6kwx9uQATYQ9H3Cp',
       'name': 'Eraserheads',
       'type': 'artist',
       'uri': 'spotify:artist:7374lH6kwx9uQATYQ9H3Cp'}],
     'available_markets': ['AD',
      'AE',
      'AL',
      'AR',
      'AT',
      'BA',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'BY',
      'CA',
      'CH',
      'CL',
      'CO',

In [23]:
track.keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [24]:
track['items'][0].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

***

In [25]:
#get only top 20 followed playlists
playlist_df = playlist_df.head(20)

In [26]:
def get_relevant_track_data(tracks_data, playlist_id, playlist_name):
    try:
        relevant_track_data = { key: tracks_data['track'][key] for key in ['id','artists','name','popularity','duration_ms'] }
        relevant_track_data['artist_id']=[artist['id'] for artist in relevant_track_data['artists'] ]
        relevant_track_data['artist_name']=[artist['name']for artist in relevant_track_data['artists'] ]
        relevant_track_data['num_artists']=len([artist['id'] for artist in relevant_track_data['artists']]) 
        relevant_track_data['playlist_id']=playlist_id
        relevant_track_data['playlist_name']=playlist_name
        
        relevant_track_data.pop('artists', None)
        return relevant_track_data
    except:
        return 

In [27]:
#playlist_tracks
all_track_data = []

for _,p_id,p_name, p_numtracks in playlist_df[['playlist_id','playlist_name','playlist_total_tracks']].to_records():
    print("Fetching data for playlist = %s, with total tracks: %d" % (p_name,p_numtracks))
    n_fetches = p_numtracks // 100
    
    playlist_track_data = []
    #get tracks in batches of 100
    for n in np.arange(n_fetches+1):
        track_data = sp.playlist_tracks(p_id, offset=n*100)
        playlist_track_data.extend([get_relevant_track_data(item, p_id,p_name) for item in track_data['items']])
        
    all_track_data.extend(playlist_track_data)

In [31]:
for n,a in enumerate(all_track_data):
    try:
        len(a)
    except:
        print(n)

In [33]:
tracks_df = pd.DataFrame([data for data in all_track_data if data is not None])
tracks_df = tracks_df.rename(columns={'id':'track_id'})
tracks_df['artist_id'] = tracks_df.apply(lambda x: x['artist_id'][0] if x['num_artists']==1 else x['artist_id'], axis=1)
tracks_df['artist_name'] = tracks_df.apply(lambda x: x['artist_name'][0] if x['num_artists']==1 else x['artist_name'], axis=1)
tracks_df.head()

ValueError: Wrong number of items passed 0, placement implies 1

In [None]:
len(tracks_df)

In [None]:
len(tracks_df['track_id'].unique())

In [None]:
tracks_df.to_csv("data/"+KEYWORD+"_playlist_tracks.csv",encoding='utf=8',index=False)

## 4. Get Tracks Data from a list of Tracks

In [None]:
tracks_df = pd.read_csv("data/"+KEYWORD+"_playlist_tracks.csv")
tracks_df.head()

In [None]:
tracks_df.shape

In [None]:
#remove track duplicates
tracks_df = tracks_df.drop_duplicates(subset='track_id')
tracks_df.shape

In [None]:
def get_track_data(t_id, playlist_id,playlist_name):                    
    track_data = sp.track(t_id)
    track_features = sp.audio_features(t_id)
    
    #get only main(first) artist
    td_list = [t_id,\
               track_data['name'],\
               track_data['artists'][0]['id'],\
               track_data['artists'][0]['name'],\
               track_data['album']['uri'].split(":")[2],\
               track_data['duration_ms'],\
               track_data['album']['release_date'],\
               track_data['popularity']]
    data = pd.DataFrame([td_list], columns = ['track_id','track_name','artist_id','artist_name','album_id','duration','release_date','popularity'])

    relevant_cols = ['danceability', 'energy', 'key', 'loudness', 'mode',\
                     'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']  
    
    tf_data = pd.DataFrame(track_features)
    tf_data = tf_data[relevant_cols]
    #tag with source playlist
    tf_data['playlist_id'] = playlist_id
    tf_data['playlist_name'] = playlist_name
    
    data = pd.concat([data, tf_data], axis=1)
    return data


In [None]:
downloaded_track_data = []

In [None]:
track_list = tracks_df['track_id'].values
playlist_name_list = tracks_df['playlist_name'].values
playlist_id_list = tracks_df['playlist_id'].values
df_list=[]

for i,track_id in enumerate(track_list):
    try:
        if track_id not in downloaded_track_data:
            print('[%d/%d] Fetching track data for %s... ' % 
                  (i+1,len(track_list),tracks_df[tracks_df['track_id']==track_id]['name'].values[0]), end = " ") 
            track_data = get_track_data(track_id, playlist_id_list[i],playlist_name_list[i]) 
            df_list.append(track_data)
            downloaded_track_data.append(track_id)
            print('done!')
    except:
        continue
    else:
        continue
    
    #sleep for 60 secs per 100 requests to avoid being blocked
    if (i % 100 == 0)&(i > 0):
        time.sleep(5)    

In [None]:
tracks_data_df = pd.concat(df_list)
tracks_data_df.head()

In [None]:
tracks_data_df.to_csv("data/"+KEYWORD+"_playlist_tracks_data.csv", index=False, encoding='utf-8')