In [1]:
import config

import os
import numpy as np
import pandas as pd
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

from time import sleep
from random import randint

In [2]:
#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

## Get artist and track title

In [3]:
def display_artist(track_info):
## create a single string with all artist names
    return " / ".join([x['name'] for x in track_info['artists']])

In [4]:
def get_artist_etc_given_id(track_ids, print_status_rate = None):
    ## use the ids to get the title (name) and artist info for each track
    titles=[]
    artists=[]
    pp_artist=[]
    n=0
    for i in range(len(track_ids)):
        trackid = track_ids[i]
        n+=1
        if print_status_rate is not None and n % print_status_rate == 0:
            print(f'processed {n} records')

        track_info = sp.track(trackid, market='GB')
        titles.append(track_info['name'])
        artists.append([x['name'] for x in track_info['artists']])
        pp_artist.append(display_artist(track_info))
    print(f"DONE! returning {n} records")
    return pd.DataFrame({'title': titles, 'artists': artists, 'pp_artist': pp_artist})

## load track ids from csvs with cluster labels

This is the final version of code copied from another notebook with batching added

In [6]:
## =========== load track ids AND CLUSTER INFO from csvs and fill in the artist etc info =======

infiles = {'A2': 'Data/nina-staging/batchA2_big_data_id_and_cluster_labels.csv',
            'C': 'Data/nina-staging/batchC_big_data_id_and_cluster_labels.csv'}
chunksize=500

for k in infiles.keys():
    print(f'Reading: {infiles[k]}')
    my_id_labels_df = pd.read_csv(infiles[k], index_col=0)

    n_tracks = my_id_labels_df.shape[0]
    for s in range(0, n_tracks, chunksize):
        artist_etc_df = get_artist_etc_given_id(my_id_labels_df.id.to_list()[s : min(n_tracks, s+chunksize)],
                                               print_status_rate=200)
        my_track_db = pd.concat([my_id_labels_df[s : min(n_tracks, s+chunksize)], artist_etc_df], axis=1)
        
        outfile = f'Data/db/big_data_batch{k}-{s}.csv'
        my_track_db.to_csv(outfile, index=False)
        print(f'Saved: {outfile}')
    
print('DONE')

Reading: Data/nina-staging/batchA2_big_data_id_and_cluster_labels.csv
processed 200 records
processed 400 records
DONE! returning 500 records
Saved: Data/db/big_data_batchA2-0.csv
processed 200 records
processed 400 records
DONE! returning 500 records
Saved: Data/db/big_data_batchA2-500.csv
processed 200 records
processed 400 records
DONE! returning 500 records
Saved: Data/db/big_data_batchA2-1000.csv
processed 200 records
processed 400 records


KeyboardInterrupt: 

## load track ids from csvs with audio features

In [206]:
## load track ids from csvs with audio features
indir = 'Data/audio_features/'
infile_list = os.listdir(indir)

dfs = []
for fname in infile_list:
    fname_tracks = pd.read_csv(os.path.join(indir, fname), index_col=None)
    dfs.append(fname_tracks.id)
    
track_ids = pd.concat(dfs, axis=0)
track_ids = track_ids.drop_duplicates().reset_index(drop=True)

In [215]:
## use the ids to get the title (name) and artist info for each track
titles=[]
artists=[]
pp_artist=[]
for trackid in track_ids:
    track_info = sp.track(trackid, market='GB')
    titles.append(track_info['name'])
    artists.append([x['name'] for x in track_info['artists']])
    pp_artist.append(display_artist(track_info))


In [216]:
## --- put the ids, title, and artist all together
our_track_db = pd.concat([track_ids, pd.DataFrame({'title': titles, 
                                                  'artists': artists, 
                                                  'pp_artist': pp_artist})], axis=1)
our_track_db.head(3)

Unnamed: 0,id,title,artists,pp_artist
0,5GAB1X0AJq2EZjxXP9zMFt,"Archie, Marry Me",[Alvvays],Alvvays
1,5xo8RrjJ9CVNrtRg2S3B1R,Motion Sickness,[Phoebe Bridgers],Phoebe Bridgers
2,7KdF7Zac5eC9jutk9Qret4,The Wire,[HAIM],HAIM


## Print track id, title, and artist to .csv


In [217]:
our_track_db.to_csv('Data/db/nina_day4_tracks.csv', index=False)

### Aside... to debug

bah! this doesn't work right... not sure how to map this function to the series in one shot

In [190]:
def get_track_name_and_combinedartists(trackid):
    """ unused function"""
    track_info = sp.track(trackid, market='GB')
    return track_info['name'], display_artist(track_info)

In [195]:
#test_tracks_df['title'], test_tracks_df['artist'] = 
test_return = test_tracks_df.map(get_track_name_and_combinedartists)

In [197]:
test_return

0                          (Archie, Marry Me, Alvvays)
1                   (Motion Sickness, Phoebe Bridgers)
2                                     (The Wire, HAIM)
3                             (Brazil, Declan McKenna)
4                                 (Hell N Back, Bakar)
5                     (Positive Force, Delicate Steve)
6    (Eat, Sleep, Wake (Nothing But You), Bombay Bi...
7                (Don’t Delete The Kisses, Wolf Alice)
8    (Funeral Singers, Sylvan Esso, Collections Of ...
9                    (Summer Girl - Bonus Track, HAIM)
Name: id, dtype: object

## Lookup hot list song info

In [9]:
get_spotify_info_for_all_tracks_in_hot_artist_title_csv('Data/Hot/billboard_hot100_2021-02-14.csv')

Saved 100 tracks in file: Data/db/billboard_hot100_2021-02-14.csv


In [10]:
get_spotify_info_for_all_tracks_in_hot_artist_title_csv('Data/Hot/uk_hot100_2021-02-14.csv')

Saved 100 tracks in file: Data/db/uk_hot100_2021-02-14.csv


In [6]:
def get_spotify_info_for_all_tracks_in_hot_artist_title_csv(infilepath):
    
    # extract the name of the input file, without the csv suffix
    songlistname = '.'.join(os.path.basename(infilepath).split('.')[:-1])
    
    outdir = os.path.join('Data', 'db')
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    outfilename = os.path.join(outdir, f'{songlistname}.csv')
    
    my_songs_to_find = pd.read_csv(infilepath, index_col=None)
        
    ids=[]
    titles=[]
    artists=[]
    pp_artist=[]
    
    for i in range(my_songs_to_find.shape[0]):
        ## these are hot songs, assume top hit is correct
        results = sp.search(q=my_songs_to_find.title[i], limit=1, market="GB")
        track_info = results['tracks']['items'][0]
        
        ids.append(track_info['id'])
        titles.append(track_info['name'])
        artists.append([x['name'] for x in track_info['artists']])
        pp_artist.append(display_artist(track_info))
    
    ## --- put the ids, title, and artist all together
    hot_tracks_db = pd.DataFrame({'id': ids, 'title': titles, 
                                  'artists': artists, 'pp_artist': pp_artist})
    
    hot_tracks_db.to_csv(outfilename, index=False)               
    print(f'Saved {hot_tracks_db.shape[0]} tracks in file: {outfilename}')
    return

### Aside: searching spotify by user
According to the docs, a spotify query type can be one or more of ‘artist’, ‘album’, ‘track’, ‘playlist’, ‘show’, and ‘episode’.

...so it seems you can't search by user

https://spotipy.readthedocs.io/en/2.19.0/#spotipy.client.Spotify.search

## ABANDONED code -- dies during the run due to http error -- needs better batching

In [None]:
## use the ids to get the title (name) and artist info for each track

titles=[]
artists=[]
pp_artist=[]
counter = 0
file_counter = 1
max_count = 5000
n=0

for i in range(5000, id_and_labels_df.shape[0]):
    trackid = id_and_labels_df.id[i]
    n+=1
    if counter == max_count:
        # time to print results
        file_counter += 1
        tracks_to_print = pd.concat([id_and_labels_df[(file_counter-1)*max_count:file_counter*max_count], 
                                     pd.DataFrame({'title': titles, 
                                                   'artists': artists, 
                                                   'pp_artist': pp_artist})], axis=1)
        
        outfilename = f'Data/db/big_data{file_counter}.csv'
        tracks_to_print.to_csv(outfilename, index=False)
        print(f"Created file ({counter} tracks): {outfilename}")
        
        
        titles=[]
        artists=[]
        pp_artist=[]
        counter = 0

    if n % 400 == 0:
        print(f'processed {n} records!')
        
    track_info = sp.track(trackid, market='GB')
    titles.append(track_info['name'])
    artists.append([x['name'] for x in track_info['artists']])
    pp_artist.append(display_artist(track_info))
    counter += 1
   
## end of loop -- print the rest
file_counter += 1
tracks_to_print = pd.concat([id_and_labels_df[(file_counter-1)*max_count:file_counter*max_count], 
                             pd.DataFrame({'title': titles, 
                                           'artists': artists, 
                                           'pp_artist': pp_artist})], axis=1)

outfilename = f'Data/db/big_data{file_counter}.csv'
tracks_to_print.to_csv(outfilename, index=False)
print(f"Created file ({counter} tracks): {outfilename}")