In [1]:
import pandas as pd
import random
import time

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
secrets_file = open("../secrets/secrets.txt","r")
string = secrets_file.read()
secrets_file.close()

In [3]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1]

In [4]:
#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

In [5]:
def get_track_with_uri(category, search, track):
    song = {}

    song['track_name'] = track['name']
    song[category] = search
    song['id'] = track['id']
    song['preview'] = track['preview_url']
    song_pd = pd.DataFrame([song])
    
    return song_pd

In [6]:
def get_songs_with_features(category, search, limit, off, max_retries=5):

    logfile = open('spotilogger.log', 'a')
    
    for i in range(max_retries):
        try:
            time.sleep(random.randint(500,2000)/1000)
            tracks = sp.search(q=f'{category}:{search}', type='track', limit=limit, offset=off)['tracks']['items']
        except:
            logfile.write(f'\t### error recieving data from the API ###\n')
            if i < max_retries-1:
                delay = 0.1 * (2 ** i)
                logfile.write(f'\t### waiting for {delay} sec ###\n')
                time.sleep(delay)
            else:
                logfile.write(f'\t### max retries of {max_retries} used, will skip request ###\n')
                continue
        else:
            break

    if len(tracks) == 0:
        logfile.write(f'\t### no entries ###\n')
        return

    c = 0
    track_uris = []
    for track in tracks:
        song_pd= get_track_with_uri(category, search, track)
        track_uris.append(track['uri'])

        if c == 0:
            songs_wo_features = song_pd
        else:
            songs_wo_features = pd.concat([songs_wo_features, song_pd], axis=0)
        c += 1

    songs_wo_features = songs_wo_features.reset_index(drop=True)
    features = pd.json_normalize(sp.audio_features(track_uris)).reset_index(drop=True)
    songs_with_features = pd.concat([songs_wo_features, features], axis=1)

    logfile.write(f'\t{c} songs recieved\n')
    logfile.close()
    
    return songs_with_features

In [7]:
def get_songs_from_category(category, searchlist, limit=50, offset=0, savepoints=100):
    x = 0
    count = len(searchlist)
    if count > 1:
        what = count
    else:
        what = searchlist[0]

    for search in searchlist:
        with open('spotilogger.log', 'a') as logfile:
            logfile.write(f'{category}{x:6.0f}: {search}\n')

        try:
            if x == 0:
                songdb = get_songs_with_features(category, search, limit, offset)
            else:
                songdb = pd.concat([songdb, get_songs_with_features(category, search, limit, offset)], axis=0)
        except Exception as e:
            with open('spotilogger.log', 'a') as logfile:
                logfile.write(f'### something went wrong at datahandling: {e} ###\n')
            continue
        else:
            x += 1
            songdb[category] = search

        if x % savepoints == 0:
            songdb.to_csv(f'YOUR_SONG_DB_{category}_{what}_off{offset}.csv')

    songdb.to_csv(f'YOUR_SONG_DB_{category}_{what}_off{offset}.csv')
    return songdb


In [8]:
# c = input('In which category do you want to search?')
# s = input('What do you want to search there?')
# filldb(c, s.split(','))

# album, artist, track, year, genre
# example: data = filldb('artist', ['Metallica'], 50, 0, 100)

In [9]:
def get_arts_per_category(category, search, limit=50, offset=0):
    arts = sp.search(q=f'{category}:{search}', type='artist', limit=limit, offset=offset)['artists']['items']
    print(search, offset)
    artists = [art['name'] for art in arts]
    return artists

In [10]:
# arts = []
# for x in range(0,500,50):
#     try:
#         arts.append([get_arts_per_category('year', i, offset=x) for i in range(1950,2024)])
#     except:
#         print('error')
#         continue

# 20 songs of every 20 artists of 1380 genres:

In [None]:
with open('genres.txt', 'r') as f:
    genres = [g.replace('\n', '') for g in f.readlines()]

arts = []
for x in range(0,50,50):
    try:
        arts.append([get_arts_per_category('genre', genre, limit=20, offset=x) for genre in genres])
    except:
        print('error')
        continue

# 2min 18s

In [24]:
flat_list = [art for sublist in arts for art in sublist]
flat_list = [art for sublist in flat_list for art in sublist]
flat_list = list(set(flat_list))

with open('artists.txt', 'w') as file:
    for art in flat_list:
        file.write(f'{art}\n')

8798

In [11]:
with open('artists.txt', 'r') as f:
    flat_list = [g.replace('\n', '') for g in f.readlines()]

In [12]:
random.shuffle(flat_list)
len(flat_list)

8798

In [13]:
def get_more_than_50_per(category, searches, start, end, batch_size, savepoints):
    for offset in range(start, end, batch_size):
        try:
            return get_songs_from_category('artist', searches, limit=batch_size, offset=offset, savepoints=savepoints)
        except Exception as e:
            with open('spotilogger.log', 'a') as logfile:
                logfile.write(f'FATAL!!! {e.args}')
            return
    

In [14]:
get_more_than_50_per('artist', flat_list[1], 0, 20, 20, 10)