In [1]:
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
secrets_file = open("secrets/secrets.txt","r")
string = secrets_file.read()
secrets_file.close()

In [3]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1]

In [4]:
#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

In [5]:
def get_name_artists_from_track(track):
    return [(track["name"],artist["name"]) for artist in track["artists"]]

def flatten_to_artist(input_list):
    return [item[1] for sublist in input_list for item in sublist]

def get_name_artist_from_playlist_item(playlist_item):
    return get_name_artists_from_track(playlist_item['track'])

In [6]:
from random import randint
from time import sleep

def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        # sleep(randint(100,1000)/1000)
    return tracks

In [7]:
mytracks = get_playlist_tracks('05Tli1k3x5YQEvNZyOcR4Z') # wixderwoche
# mytracks = get_playlist_tracks('4rnleEAOdmFAbRcNCgZMpY') # jan
# mytracks = get_playlist_tracks('5S8SJdl1BDc0ugpkEvFsIL') # willis orr (10,000 songs)

In [8]:
mytracks_artists = [get_name_artist_from_playlist_item(mytrack) for mytrack in mytracks]

my_artists = flatten_to_artist(mytracks_artists)

# drop duplicate artists
my_artists_unique = list(set(my_artists))
# my_artists_unique = []
# [my_artists_unique.append(artist.strip()) for artist in my_artists if artist.strip() not in my_artists_unique]

display(len(my_artists_unique))


288

In [9]:
import json

In [10]:
counter = 0
batch_size = 250

for i in range(0, len(my_artists_unique), batch_size):
    start = i
    stop = i + batch_size

    for my_artist in my_artists_unique[start:stop]:

            print(f'fetching {counter:5.0f}:  ', my_artist)
            counter += 1
            
            try:
                tr = sp.search(q=f'artist:{my_artist}', type='track', limit=20)['tracks']['items']
                

                if len(tr) > 0:
                    for t in tr:
                        song = {}
                        features = sp.audio_features(t['uri'])
                        
                        song['track_name'] = t['name']
                        song['artist'] = my_artist
                        song['id'] = t['id']
                        song['preview'] = t['preview_url']
                        song_pd = pd.DataFrame([song])

                        song_pd = pd.concat([song_pd, pd.json_normalize(features[0])], axis=1)
                        if counter == 1:
                            songs = song_pd
                        else:
                            songs = pd.concat([songs, song_pd], axis=0)
            except:
                print('something went wrong')
                continue            
    songs.reset_index(drop=True).to_csv(f'csv_data/willis_{counter}.csv', index=False)            


fetching     0:   Panda Dub
dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
https://api.spotify.com/v1/tracks/3DX8wXmuWNVrUN0QfcLKWZ
dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
https://api.spotify.com/v1/tracks/7oR7EG6JnUw70rX4ORSaO8
dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])
https://api.spotify.com/v1/tracks/4bO8c34CdaCstrES2zxF2v
dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 