In [109]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [110]:
import sys
import spotipy
import spotipy.util as util
import json
import time
import urllib.request

import pandas as pd

In [111]:
def generate_token():
    with open('configs.secret') as f:
        data = json.load(f)

    #scope = 'user-library-read'
    scope = None
    username = data['username']
    client_id = data['client_id']
    client_secret = data['client_secret']
    redirect_uri = data['redirect_uri']

    token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_uri)

    if not token:
        raise RuntimeError("Can't get token for", username)
    
    return token

In [112]:
token = generate_token()
sp = spotipy.Spotify(auth=token)

In [113]:
playlists = []
num_playlists = 2000
limit = 50

search_term = 'banger'.lower()
isBanger = True

for i in range(0, int(num_playlists / limit)):
    results = sp.search(search_term, limit=limit, offset=limit*i, type='playlist', market='us')
    filtered_playlists = filter(lambda x: search_term in x['name'].lower(), results['playlists']['items'])
    playlists.extend(filtered_playlists)
    time.sleep(0.1)

print('Total filtered playlists', len(playlists))

Total filtered playlists 1893


In [114]:
tracks = {}

# Loop through playlists
for playlist in playlists:
    
    # A track can be added to a playlist several times. Only count one occurance per playlist
    
    playlist_track_deduper = []
    # If more than 100 songs in playlist, requires multiple queries to get all songs
    next = True
    offset = 0
    while (next):
        try:
            results = sp.playlist_tracks(playlist['id'],offset=offset)
        # Takes long enough token can expire, refresh token
        except Exception as excpt:
            print(excpt)
            token = generate_token()
            sp = spotipy.Spotify(auth=token)
            results = sp.playlist_tracks(playlist['id'],offset=offset)
            
        #print(playlist['uri'],len(results['items']))
        for item in results['items']:
            track = item['track']
            
            if not track:
                print(item)
                continue
            
            # Ignore tracks that are not on Spotify
            if track['is_local']:
                continue
                
            track_id = track['id']
            
            # Only count a single occurance of a track per playlist
            if track_id in playlist_track_deduper:
                continue
            playlist_track_deduper.append(track_id)
            
            # increment occurances if present
            if track_id in tracks:
                tracks[track_id]['banger_occurances'] = tracks[track_id]['banger_occurances'] + 1
            
            # else create new track entry
            else:        
                track.pop('album', None)
                track.pop('available_markets', None)
                track['banger_occurances'] = 1
                tracks[track_id] = track
                
        offset += 100
        next = results['next']
        time.sleep(0.1)

print(len(tracks))
print(max(tracks, key=lambda x: tracks[x]['banger_occurances']))

{'added_at': '2017-11-17T12:50:26Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/rickkve'}, 'href': 'https://api.spotify.com/v1/users/rickkve', 'id': 'rickkve', 'type': 'user', 'uri': 'spotify:user:rickkve'}, 'is_local': False, 'primary_color': None, 'track': None, 'video_thumbnail': {'url': None}}
http status: 401, code:-1 - https://api.spotify.com/v1/playlists/0Cv97tEImJ5OUIBTmZGApI/tracks?limit=100&offset=200:
 The access token expired
{'added_at': '2018-07-27T04:28:25Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/flamingdiscopanda'}, 'href': 'https://api.spotify.com/v1/users/flamingdiscopanda', 'id': 'flamingdiscopanda', 'type': 'user', 'uri': 'spotify:user:flamingdiscopanda'}, 'is_local': False, 'primary_color': None, 'track': None, 'video_thumbnail': {'url': None}}
{'added_at': '2019-03-01T02:44:20Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/dreww_skii'}, 'href': 'https://api.spotify.com/v1

In [115]:
filtered_tracks = {k: v for k, v in tracks.items() if v['banger_occurances'] > 10}
print(len(tracks),len(filtered_tracks))


138889 6556


In [116]:
top_track = max(filtered_tracks, key=lambda x: filtered_tracks[x]['banger_occurances'])
max_occurances = filtered_tracks[top_track]['banger_occurances']

print(top_track, max_occurances)

2xLMifQCjDGFmkHkpNLD9h 680


In [117]:
for track_id, track_details in filtered_tracks.items():
    
    artists = track_details.pop('artists', None)
    if artists:
        artist_name_string = ''
        artist_id_string = ''
        for artist in artists:
            artist_name_string = artist_name_string + artist['name'] + ', '
            artist_id_string = artist_id_string + artist['id'] + ', '
        artist_name_string = artist_name_string[0:-2]
        artist_id_string = artist_id_string[0:-2]
        track_details['artist.name'] = artist_name_string
        track_details['artist.id'] = artist_id_string
    
    external_ids = track_details.pop('external_ids', None)
    if (external_ids):
        for src, src_id in external_ids.items():
            key = 'external_ids.' + src
            track_details[key] = src_id
    
    track_details.pop('external_urls', None)
    track_details['isBanger'] = isBanger
    filtered_tracks[track_id] = track_details

In [118]:
track_attributes_to_remove = ['codestring', 'code_version', 'echoprintstring', 'echoprint_version', 'synchstring', 'synch_version', 'rhythmstring', 'rhythm_version']
if False:
    # analysis: track, sections, segments,
    for track_id, track_details in filtered_tracks.items():
        try:
            results = sp.audio_analysis(track_id)

        # Takes long enough token can expire, refresh token
        except Exception as excpt:
            print(excpt)
            token = generate_token()
            sp = spotipy.Spotify(auth=token)
            results = sp.audio_analysis(track_id)

        track_info = results['track']
        for key in track_attributes_to_remove:
            track_info.pop(key, None)
        tmp_track_details = {**track_details, **track_info}
        tmp_track_details['sections'] = results['sections']

        filtered_tracks[track_id] = tmp_track_details

In [119]:
track_ids = list(filtered_tracks.keys())
num_tracks = len(track_ids)
num_slices = int(num_tracks / 50)
for i in range(num_slices):
    try:
        results = sp.audio_features(track_ids[i * 50 : i * 50 + 50])
    
    # Takes long enough token can expire, refresh token
    except Exception as excpt:
        token = generate_token()
        sp = spotipy.Spotify(auth=token)
        results = sp.audio_features(track_ids[i * 50 : i * 50 + 50])
    for j in range(50):
        track_id = track_ids[i * 50 + j]
        track_details = filtered_tracks[track_id]
        if (track_details and results[j]):
            tmp_track_details = {**track_details, **results[j]}
        filtered_tracks[track_id] = tmp_track_details
    time.sleep(0.1)
        

In [None]:
# artist.hotttnesss,artist.id,artist.name,artist_mbtags,artist_mbtags_count,bars_confidence,bars_start,beats_confidence,beats_start,duration,end_of_fade_in,familiarity,key,key_confidence,latitude,location,longitude,loudness,mode,mode_confidence,release.id,release.name,similar,song.hotttnesss,song.id,start_of_fade_out,tatums_confidence,tatums_start,tempo,terms,terms_freq,time_signature,time_signature_confidence,title,year


In [120]:
#print(set(map(lambda x: (x[1]['name'],x[1]['artists']['names'],x[1]['banger_occurances']), list(filtered_tracks.items()))))
print(list(map(lambda x: x[1]['name'], list(filtered_tracks.items()))))



In [121]:
df = pd.DataFrame(filtered_tracks).transpose()
df.index.name = 'song.id'
df.to_csv(search_term + '_dataset.csv')