# Evaluations

## Get Recommendations

In [1]:
import json

with open('evaluation_data.json') as f:
    val = json.load(f)

In [2]:
val[0]

{'artist': 'Passion Pit',
 'album': 'Manners',
 'song': '09 Sleepyhead',
 'start': 3767627,
 'stop': 5090627,
 'validation': True,
 'embedding': [0.11278816312551498,
  0.11143997311592102,
  -0.11756818741559982,
  -0.23810337483882904,
  0.054074134677648544,
  -0.027480294927954674,
  0.2202390432357788,
  0.07895691692829132,
  0.24123559892177582,
  0.11683941632509232,
  -0.06588985025882721,
  -0.2999226152896881,
  -0.0035447711125016212,
  -0.21891173720359802,
  -0.17247919738292694,
  0.10032028704881668,
  -0.20257359743118286,
  0.2879723012447357,
  0.060127925127744675,
  0.14642766118049622,
  0.041968923062086105,
  0.034772079437971115,
  -0.08529561758041382,
  -0.4657568335533142,
  0.11055739969015121,
  0.05489571765065193,
  -0.052408117800951004,
  -0.15117725729942322,
  0.03145952522754669,
  -0.044740334153175354,
  -0.297035813331604,
  -0.28945040702819824]}

In [3]:
import pandas as pd

df = pd.DataFrame(val)
df.head()

Unnamed: 0,artist,album,song,start,stop,validation,embedding
0,Passion Pit,Manners,09 Sleepyhead,3767627,5090627,True,"[0.11278816312551498, 0.11143997311592102, -0...."
1,Passion Pit,Manners,09 Sleepyhead,3855168,5178168,True,"[0.11765195429325104, 0.09204290062189102, -0...."
2,Passion Pit,Manners,08 Eyes As Candles,5459328,6782328,False,"[0.16628490388393402, 0.060314495116472244, 0...."
3,Passion Pit,Manners,08 Eyes As Candles,2532575,3855575,False,"[0.29686233401298523, 0.016828976571559906, -0..."
4,Passion Pit,Manners,02 Little Secrets,5270976,6593976,True,"[0.2858065068721771, -0.1420089453458786, -0.0..."


In [4]:
df['song'] = df.song.str.partition()[2]

In [5]:
df.head()

Unnamed: 0,artist,album,song,start,stop,validation,embedding
0,Passion Pit,Manners,Sleepyhead,3767627,5090627,True,"[0.11278816312551498, 0.11143997311592102, -0...."
1,Passion Pit,Manners,Sleepyhead,3855168,5178168,True,"[0.11765195429325104, 0.09204290062189102, -0...."
2,Passion Pit,Manners,Eyes As Candles,5459328,6782328,False,"[0.16628490388393402, 0.060314495116472244, 0...."
3,Passion Pit,Manners,Eyes As Candles,2532575,3855575,False,"[0.29686233401298523, 0.016828976571559906, -0..."
4,Passion Pit,Manners,Little Secrets,5270976,6593976,True,"[0.2858065068721771, -0.1420089453458786, -0.0..."


## Get Similarity Data

In [134]:
import base64
import datetime as dt
import os
import requests as rq


SPOTIFY_API_URL = 'https://api.spotify.com/v1'
SPOTIFY_ID = os.environ.get('SPOTIFY_CLIENT_ID')
SPOTIFY_SECRET = os.environ.get('SPOTIFY_SECRET_ID')
SPOTIFY_CREDENTIALS = base64.b64encode((SPOTIFY_ID + ':' + SPOTIFY_SECRET).encode('utf-8')).decode('ascii')


def authenticate():
    return rq.post(
        'https://accounts.spotify.com/api/token',
        data={'grant_type': 'client_credentials'},
        headers={'Authorization': 'Basic ' + SPOTIFY_CREDENTIALS}
    ).json()


def get_session():
    auth_response = authenticate()
    access_token = auth_response['access_token']
    expiration_date = datetime.datetime.now() + datetime.timedelta(seconds = auth_response['expires_in'])
    def session(resource, *args, **kwargs):
        return rq.get(
            SPOTIFY_API_URL + resource,
            headers={'Authorization': 'Bearer ' + access_token},
            *args,
            **kwargs
        )
    return session, expiration_date


def search_tracks(session, artist, song):
    return session('/search', params={'q': f'{artist} {song}', 'type': 'track'}).json()

def parse_tracks(tracks):
    t = tracks['tracks']['items'][0]
    id = t['id']
    meta = {
        'track_name': t['name'],
        'album_name': t['album']['name'],
        'artist_names': ', '.join(a['name'] for a in t['artists'])}
    return id, meta


def get_audio_features(session, track_id):
    response = session(f'/audio-features/{track_id}')
    return response.json()


auth_response

{'access_token': 'BQCbDuf0QrWCoLfN2uom_fy4LUmkhf4TSTJmqalehpgt6VhoPmnMmht0yefWbNy8pmttzY7nx2Gea1Y2KUM',
 'token_type': 'Bearer',
 'expires_in': 3600,
 'scope': ''}

In [127]:
authenticate()

{'access_token': 'BQCBY40pHfqJDWWsDz4_VsR6fT9EhmNxcCAzrFAOWvY6yrtulLc400JK5pJlm3fjtwf-opvl7wEOY0K40ts',
 'token_type': 'Bearer',
 'expires_in': 3600,
 'scope': ''}

In [128]:
parse_tracks(r)

('1Q5kgpp4pmyGqPwNBzkSrw',
 {'track_name': 'Roadhouse Blues',
  'album_name': 'Morrison Hotel',
  'artist_names': 'The Doors'})

In [135]:
search_tracks(session, 'Death Cab For Cutie', 'Passenger Seat')

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Death+Cab+For+Cutie+Passenger+Seat&type=track&offset=0&limit=20',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/0YrtvWJMgSdVrk3SfNjTbx'},
       'href': 'https://api.spotify.com/v1/artists/0YrtvWJMgSdVrk3SfNjTbx',
       'id': '0YrtvWJMgSdVrk3SfNjTbx',
       'name': 'Death Cab for Cutie',
       'type': 'artist',
       'uri': 'spotify:artist:0YrtvWJMgSdVrk3SfNjTbx'}],
     'available_markets': ['AD',
      'AE',
      'AL',
      'AR',
      'AT',
      'AU',
      'BA',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'BY',
      'CA',
      'CH',
      'CL',
      'CO',
      'CR',
      'CY',
      'CZ',
      'DE',
      'DK',
      'DO',
      'DZ',
      'EC',
      'EE',
      'EG',
      'ES',
      'FI',
      'FR',
      'GB',
      'GR',
      'GT',
      'HK',
      'HN',
      'HR',
      'HU',
      'ID',
      'IE

In [137]:
artist, song

('Death Cab For Cutie', 'Pessenger Seat')

In [None]:
import json
import time

from tqdm.notebook import tqdm

session, expiration_date = get_session()

artist_songs = df[['artist', 'song']].drop_duplicates()

for _, (artist, song) in tqdm(artist_songs.iterrows(), total=len(artist_songs)):
    output_dir = output_dir = os.path.join('audio-features', artist, song)
    output_dest = os.path.join(output_dir, 'audio_features.json')
    
    if os.path.exists(output_dest): continue

    # reauthenticate if necessary
    if dt.datetime.now() > expiration_date:
        session, expiration_date = get_session()

    try:
        # get track ID and meta data
        tracks = search_tracks(session, artist, song)
        track_id, track_meta = parse_tracks(tracks)
    except (KeyError, IndexError):
        continue

    # get the audio features
    audio_features = get_audio_features(session, track_id)
    
    # update the audio features and dump
    audio_features.update(track_meta)
    os.makedirs(output_dir, exist_ok=True)
    with open(output_dest, 'w') as f:
        json.dump(audio_features, f)

    time.sleep(1)

HBox(children=(FloatProgress(value=0.0, max=10904.0), HTML(value='')))

In [133]:
tracks

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Death+Cab+For+Cutie+%22Pessenger+Seat%22&type=track&offset=0&limit=20',
  'items': [],
  'limit': 20,
  'next': None,
  'offset': 0,
  'previous': None,
  'total': 0}}

In [107]:
r['tracks']['items'][0]['name']

'Roadhouse Blues'

In [87]:
session = get_session()
track_id = get_track_id(session, 'The Doors', 'Roadhouse Blues')
get_audio_features(session, track_id)

{'danceability': 0.612,
 'energy': 0.634,
 'key': 9,
 'loudness': -8.212,
 'mode': 1,
 'speechiness': 0.0319,
 'acousticness': 0.337,
 'instrumentalness': 4.6e-05,
 'liveness': 0.147,
 'valence': 0.904,
 'tempo': 121.059,
 'type': 'audio_features',
 'id': '1Q5kgpp4pmyGqPwNBzkSrw',
 'uri': 'spotify:track:1Q5kgpp4pmyGqPwNBzkSrw',
 'track_href': 'https://api.spotify.com/v1/tracks/1Q5kgpp4pmyGqPwNBzkSrw',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1Q5kgpp4pmyGqPwNBzkSrw',
 'duration_ms': 243827,
 'time_signature': 4}

In [72]:
session = get_session()
session('/audio-features/1Q5kgpp4pmyGqPwNBzkSrw')

<Response [200]>

In [37]:
access_token = auth_response['access_token']
access_token

'BQCbDuf0QrWCoLfN2uom_fy4LUmkhf4TSTJmqalehpgt6VhoPmnMmht0yefWbNy8pmttzY7nx2Gea1Y2KUM'

In [54]:
query('/audio-features/1Q5kgpp4pmyGqPwNBzkSrw').json()

{'danceability': 0.612,
 'energy': 0.634,
 'key': 9,
 'loudness': -8.212,
 'mode': 1,
 'speechiness': 0.0319,
 'acousticness': 0.337,
 'instrumentalness': 4.6e-05,
 'liveness': 0.147,
 'valence': 0.904,
 'tempo': 121.059,
 'type': 'audio_features',
 'id': '1Q5kgpp4pmyGqPwNBzkSrw',
 'uri': 'spotify:track:1Q5kgpp4pmyGqPwNBzkSrw',
 'track_href': 'https://api.spotify.com/v1/tracks/1Q5kgpp4pmyGqPwNBzkSrw',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1Q5kgpp4pmyGqPwNBzkSrw',
 'duration_ms': 243827,
 'time_signature': 4}

In [89]:
r = query('/search', params={'q': 'the doors "roadhouse blues"', 'type': 'track'}).json()
r

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=the+doors+%22roadhouse+blues%22&type=track&offset=0&limit=20',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/22WZ7M8sxp5THdruNY3gXt'},
       'href': 'https://api.spotify.com/v1/artists/22WZ7M8sxp5THdruNY3gXt',
       'id': '22WZ7M8sxp5THdruNY3gXt',
       'name': 'The Doors',
       'type': 'artist',
       'uri': 'spotify:artist:22WZ7M8sxp5THdruNY3gXt'}],
     'available_markets': ['AD',
      'AE',
      'AL',
      'AR',
      'AT',
      'AU',
      'BA',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'BY',
      'CA',
      'CH',
      'CL',
      'CO',
      'CR',
      'CY',
      'CZ',
      'DE',
      'DK',
      'DO',
      'DZ',
      'EC',
      'EE',
      'EG',
      'ES',
      'FI',
      'FR',
      'GB',
      'GR',
      'GT',
      'HK',
      'HN',
      'HR',
      'HU',
      'ID',
      'IE',
      'IL'

In [96]:
r['tracks']['items'][0]['album']['name'], r['tracks']['items'][0]['artists']

('Morrison Hotel',
 [{'external_urls': {'spotify': 'https://open.spotify.com/artist/22WZ7M8sxp5THdruNY3gXt'},
   'href': 'https://api.spotify.com/v1/artists/22WZ7M8sxp5THdruNY3gXt',
   'id': '22WZ7M8sxp5THdruNY3gXt',
   'name': 'The Doors',
   'type': 'artist',
   'uri': 'spotify:artist:22WZ7M8sxp5THdruNY3gXt'}])

In [25]:
base64.b64encode((SPOTIFY_ID + ':' + SPOTIFY_SECRET).encode('utf-8')).decode('ascii')

'MWQxNDBiMWI2MjMzNDQyZWI2MGM4MmM5ZjVmYjhlNjg6YmVlNDM5ZTg0MGNiNGZjZDgwNWRmNTdhYjZiYWY2NTQ='

In [35]:
access_token = auth_response.json()

{'access_token': 'BQB0l9QSWxUH6O8gDuli2iTlr9dh1LepS9hfAfoJO1_dPlwfsgKaoPpEmaaQHHdxJs2yipuzND84jiv2XZY',
 'token_type': 'Bearer',
 'expires_in': 3600,
 'scope': ''}

In [44]:
# import requests as rq

# similar_tracks = {}

# LASTFM_API_KEY = '18bc22feba5e7af41172c5b005d3a113'
# LASTFM_API_URL = 'http://ws.audioscrobbler.com/2.0/'
# LIMIT = 30


# def fetch_similar_tracks(artist, song):
#     try:
#         response = rq.get(LASTFM_API_URL, params={
#             'method': 'track.getsimilar',
#             'artist': artist,
#             'track': song,
#             'api_key': LASTFM_API_KEY,
#             'limit': 30,
#             'format': 'json'
#         }).json()
#         matches = extract_similarity_data(response)
#     except Exception as err:
#         return str(err)
#     return matches
    

# def extract_similarity_data(response):
#     tracks = response['similartracks']['track']
#     return [
#         {'song': t['name'], 'artist': t['artist']['name'], 'match': t['match']}
#         for t in tracks]
    

# response = fetch_similar_tracks('Grateful Dead', 'Fire On The Mountain')

# response

[{'song': "Franklin's Tower", 'artist': 'Grateful Dead', 'match': 1.0},
 {'song': 'Estimated Prophet', 'artist': 'Grateful Dead', 'match': 0.988805},
 {'song': 'Free', 'artist': 'Phish', 'match': 0.190383},
 {'song': 'My Sisters And Brothers',
  'artist': 'Jerry Garcia Band',
  'match': 0.169381},
 {'song': 'Stash', 'artist': 'Phish', 'match': 0.167339},
 {'song': 'Greatest Story Ever Told', 'artist': 'Bob Weir', 'match': 0.147924},
 {'song': 'Simple Twist of Fate',
  'artist': 'Jerry Garcia Band',
  'match': 0.147743},
 {'song': 'Deal', 'artist': 'Jerry Garcia', 'match': 0.144996},
 {'song': 'The Weight', 'artist': 'The Band', 'match': 0.136307},
 {'song': 'Sugaree', 'artist': 'Jerry Garcia', 'match': 0.120767},
 {'song': 'Blue Sky', 'artist': 'The Allman Brothers Band', 'match': 0.107484},
 {'song': 'Dixie Chicken', 'artist': 'Little Feat', 'match': 0.0934525},
 {'song': 'Jessica', 'artist': 'The Allman Brothers Band', 'match': 0.0923399},
 {'song': 'Up On Cripple Creek', 'artist': '

In [47]:
# import time

# from tqdm.notebook import tqdm

# similar_songs = {}

# items = df[['artist', 'song']].drop_duplicates()

# for _, r in tqdm(items.iterrows(), total=len(items)):
#     similar_songs[(r.artist, r.song)] = fetch_similar_tracks(r.artist, r.song)
#     time.sleep(1)

HBox(children=(FloatProgress(value=0.0, max=10904.0), HTML(value='')))




In [4]:
# ! wget http://millionsongdataset.com/sites/default/files/lastfm/lastfm_train.zip
# ! unzip -q lastfm_train.zip

In [2]:
# ! wget http://millionsongdataset.com/sites/default/files/lastfm/lastfm_test.zip
# ! unzip -q lasstfm_test.zip