# VL6 Python Implementation
### Danny Akimchuk

In [None]:
import json
import os
import numpy as np
import multiprocessing
from collections import defaultdict
from copy import copy
import nltk
nltk.download('punkt')
from joblib import Parallel, delayed
import multiprocessing
import string
from threading import Thread
import tqdm
import implicit #use conda install -c conda-forge implicit 
from scipy.sparse import csr_matrix, find, lil_matrix, dok_matrix
import time
from colorama import Fore, Back, Style
from sklearn import metrics as skmet


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/akimchukdaniel/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Object Structures

In [2]:
class Playlist:
    def __init__(self, name, collaborative, pid, modified_at, num_tracks, num_albums, num_followers, tracks):
        self.name = name
        self.collaborative = collaborative
        self.pid = pid
        self.modified_at = modified_at
        self.num_tracks = num_tracks
        self.num_albums = num_albums
        self.num_followers = num_followers
        self.tracks = tracks
        self.features = {}

class Track:
    def __init__(self, artist_name, track_uri, artist_uri, track_name, album_uri, duration_ms, album_name):
        self.artist_name = artist_name
        self.track_uri = track_uri
        self.artist_uri = artist_uri
        self.track_name = track_name
        self.album_uri = album_uri
        self.duration_ms = duration_ms
        self.album_name = album_name
        self.features = {}

class PlaylistTrack:
    def __init__(self, track, pos):
        self.track = track
        self.pos = pos

class SparseMatrix:
    def __init__(self, num_rows, num_cols, entries=None):
        #NOTE: entries are (row, col, value)
        self.num_rows = num_rows
        self.num_cols = num_cols
        
        self.rows = {}
        
        if entries is not None:
            for row, col, value in entries:
                if row not in self.rows:
                    self.rows[row] = defaultdict(lambda: 0)
                self.rows[row][col] = value
    
    def setValue(self, row, col, value):
        if row not in self.rows:
            self.rows[row] = defaultdict(lambda: 0)
        self.rows[row][col] = value
    
    def getValue(self, row, col):
        if row in self.rows:
            return self.rows[row][col]
        return 0
    
    def getRow(self, row):
        if row in self.rows:
            to_return = np.zeros((self.num_cols,))
            for col, value in self.rows[row].items():
                to_return[col] = value
            return to_return
        return None
    
    def getCol(self, col):
        colVec = np.zeros(self.num_rows)
        for ind, ints in self.rows.items():
            colVec[ind] = ints[col]
        return colVec
    
    def printMatrix(self):
        for row in range(self.num_rows):
            print(self.getRow(row))
            
    def getInteractions(self):
        interactions = []
        for row, vals in self.rows.items():
            for col, val in vals.items():
                interactions.append((row, col, val))
        return interactions
    
    def getTransposeInteractions(self):
        interactions = []
        for row, vals in self.rows.items():
            for col, val in vals.items():
                interactions.append((col, row, val))
        return interactions
    
    def getRowInteractions(self, row):
        interactions = []
        for interaction in self.rows[row].items():
            interactions.append(interaction)
        return interactions
    
    def getTranspose(self):
        newMatr = SparseMatrix(self.num_cols, self.num_rows, self.getTransposeInteractions())
        return newMatr

class Artist:
    def __init__(self, name, uri, popularity):
        self.name = name
        self.uri = uri
        self.popularity = popularity
    def __str__(self):
        return "Name: " + self.name + ", URI: " + self.uri + ", Popularity: " + str(self.popularity)
    

## Import Data

In [3]:
local_data_path = "/Users/akimchukdaniel/Google Drive/locals.json"
local_artists = {}
local_data_file = open(local_data_path)
file_contents= local_data_file.read()
local_json = json.loads(file_contents)
for city in local_json:
    local_artists[city] = {}
    for artist in local_json[city]:
        local_artists[city][artist["artist_uri"]] = Artist(artist["artist_name"], artist["artist_uri"], artist["artist_popularity"])
print("Done importing local artists.")

Done importing local artists.


In [4]:
data_path = "/Users/akimchukdaniel/mpd_data/mpd.v1/data_big/"
test_data_path = "/Users/akimchukdaniel/mpd_data/challenge.v1/challenge_set.json"
city_to_test = "Los Angeles"
tracks = {}
track_ids = []
track_id_len = 0
track_id_to_index = {}
playlists = {}
pids = []
test_pids = []
local_pids = []
potential_eval_pids = []
interactions = {}
file_count = 0
filenames = list(os.listdir(data_path))
for i in range(len(filenames)):
    filenames[i] = data_path + filenames[i]
filenames.append(test_data_path)

for filename in tqdm.tqdm(filenames):
    if filename == ".DS_Store":
        continue
    if filename == test_data_path:
        num_playlists_train = num_playlists
        isTest = True
    else:
        isTest = False
    data_file = open(filename)
    file_count += 1
    file_contents = data_file.read()
    jsonArray = json.loads(file_contents)
    for playlist_data in jsonArray["playlists"]:
        is_local = False
        num_local = 0
        try:
            pid = int(playlist_data["pid"])
            
            try:
                name = playlist_data["name"]
            except:
                name = None
                
            try:
                collab = playlist_data["collaborative"] == 'true'
            except:
                collab = None
            
            try:
                modified_at = int(playlist_data["modified_at"])
            except:
                modified_at = None
            
            try:
                num_tracks = int(playlist_data["num_tracks"])
            except:
                num_tracks = None
            
            try:
                num_albums = int(playlist_data["num_albums"])
            except:
                num_albums = None
                
            try:
                num_followers = int(playlist_data["num_followers"])
            except:
                num_followers = None
                
            try:
                tracks_data = playlist_data["tracks"]
                playlist_interactions = defaultdict(lambda: 0)
                playlist_tracks = []
                for track_data in tracks_data:
                    track_uri = track_data["track_uri"]
                    if track_uri in tracks:
                        track = tracks[track_uri]
                    else:
                        artist_name = track_data["artist_name"]
                        artist_uri = track_data["artist_uri"]
                        if artist_uri in local_artists[city_to_test]:
                            num_local += 1
                            is_local = True
                            #print(artist_name, "is local")
                        track_name = track_data["track_name"]
                        album_uri = track_data["album_uri"]
                        duration_ms = track_data["duration_ms"]
                        album_name = track_data["album_name"]
                        track = Track(artist_name, track_uri, artist_uri, track_name, album_uri, duration_ms, album_name)
                        tracks[track_uri] = track
                        track_ids.append(track_uri)
                        track_id_to_index[track_uri] = track_id_len
                        track_id_len += 1
                    try:
                        pos = int(track_data["pos"])
                    except:
                        pos = None
                    playlist_tracks.append(PlaylistTrack(track, pos))
                    playlist_interactions[track_uri] = playlist_interactions[track_uri] + 1
            except:
                playlist_tracks = []
                playlist_interactions = None
            
            playlist = Playlist(name, collab, pid, modified_at, num_tracks, num_albums, num_followers, playlist_tracks)
            playlists[pid] = playlist
            interactions[pid] = playlist_interactions
            pids.append(pid)
            if isTest:
                test_pids.append(pid)
            if is_local:
                #print(pid,"is a local playlist")
                local_pids.append(pid)
                potential_eval_pids.append(pid)
        except Exception as e:
            print(str(e))
            pass
    num_playlists = len(playlists)
    num_tracks = len(tracks)

print("Imported " + str(num_playlists) + " playlists containing " + str(num_tracks) + " unique tracks from " + str(file_count) + " files.")
print("Local Playlists: " + str(len(local_pids)))
del filenames

    

100%|██████████| 1001/1001 [13:26<00:00,  1.12it/s] 

Imported 1010000 playlists containing 2262292 unique tracks from 1001 files.
Local Playlists: 375





## Get Playlist Features

In [None]:
# punctRemover = str.maketrans('', '', string.punctuation)
# punctToSpace = str.maketrans(string.punctuation, ' '*len(string.punctuation)) #map punctuation to space

# playlist_features = ["name_tokenized", "name_regexed", "name_original", "n_tracks"]

# num_unique = {}

# name_tokenized_map = {}
# name_regexed_map = {}
# name_original_map = {}

# for feature in playlist_features:
#     num_unique[feature] = 0

# def getIndexForFeature(feature, feature_map, feature_name):
#     if feature in feature_map:
#         return feature_map[feature]
#     else:
#         index = num_unique[feature_name]
#         num_unique[feature_name] += 1
#         feature_map[feature] = index
#         return index

# playlist_count = 0
# for pid in tqdm.tqdm(pids):
#     playlist_count += 1
#     playlist = playlists[pid]
#     for feature in playlist_features:
#         if feature == "name_tokenized":
#             if playlist.name is not None:
#                 name = playlist.name
#                 # vl6 uses a lucene text tokenizer, mine is a bit different.
#                 # this treats plural as different than the original
#                 # it also allows emoji/unicode characters that I think they remove.
#                 # may want to tweak
#                 tokens_full = nltk.word_tokenize(name)
#                 token_indices = []
#                 for token in tokens_full:
#                     simp = token.lower()
#                     simp = simp.translate(punctRemover)
#                     index = getIndexForFeature(simp, name_tokenized_map, feature)
#                     if index not in token_indices:
#                         token_indices.append(index)
                
#                 if len(token_indices) > 0:
#                     playlist.features[feature] = token_indices
#                 else:
#                     playlist.features[feature] = None
#         elif feature == "name_regexed":
#             if playlist.name is not None:
#                 name = playlist.name
#                 simp = name.lower()
#                 simp = simp.translate(punctToSpace)
#                 simp = ' '.join(simp.split())
#                 index = getIndexForFeature(simp, name_regexed_map, feature)
#                 playlist.features[feature] = index
#         elif feature == "name_original":
#             if playlist.name is not None:
#                 index = getIndexForFeature(playlist.name, name_original_map, feature)
#                 playlist.features[feature] = index
#         elif feature == "n_tracks":
#             playlist.features[feature] = playlist.num_tracks
# print("Built features for", playlist_count, "playlists...")

# del name_tokenized_map
# del name_regexed_map
# del name_original_map

# num_playlists = len(pids)

# #build name_tokenized feature matrix
# #name_tokenized_matrix = SparseMatrix(num_playlists, num_unique["name_tokenized"])
# name_tokenized_matrix_rows = []
# name_tokenized_matrix_cols = []
# name_tokenized_matrix_vals = []
# name_tokenized_matrix = None
# def buildTokenized():
#     global name_tokenized_matrix
#     print("Building tokenized matrix...")
#     for i in range(len(pids)):
#         playlist = playlists[pids[i]]
#         if "name_tokenized" in playlist.features:
#             for token_index in playlist.features["name_tokenized"]:
#                 #name_tokenized_matrix.setValue(i, token_index, 1)
#                 name_tokenized_matrix_rows.append(i)
#                 name_tokenized_matrix_cols.append(token_index)
#                 name_tokenized_matrix_vals.append(1)
#     name_tokenized_matrix = csr_matrix((name_tokenized_matrix_vals, (name_tokenized_matrix_rows, name_tokenized_matrix_cols)), shape=(num_playlists,num_unique["name_tokenized"]))
#     print("Done building tokenized matrix.")
            
# #build name_regexed feature matrix
# #name_regexed_matrix = SparseMatrix(num_playlists, num_unique["name_regexed"])
# name_regexed_matrix_rows = []
# name_regexed_matrix_cols = []
# name_regexed_matrix_vals = []
# name_regexed_matrix = None
# def buildRegexed():
#     global name_regexed_matrix
#     print("Building regexed matrix...")
#     for i in range(len(pids)):
#         playlist = playlists[pids[i]]
#         if "name_regexed" in playlist.features:
#             #name_regexed_matrix.setValue(i, playlist.features["name_regexed"], 1)
#             name_regexed_matrix_rows.append(i)
#             name_regexed_matrix_cols.append(playlist.features["name_regexed"])
#             name_regexed_matrix_vals.append(1)
#     name_regexed_matrix = csr_matrix((name_regexed_matrix_vals, (name_regexed_matrix_rows, name_regexed_matrix_cols)), shape=(num_playlists,num_unique["name_regexed"]))
#     print("Done building regexed matrix.")

# #build name_original feature matrix
# #name_original_matrix = SparseMatrix(num_playlists,num_unique["name_original"])
# name_original_matrix_rows = []
# name_original_matrix_cols = []
# name_original_matrix_vals = []
# name_original_matrix = None
# def buildOriginal():
#     global name_original_matrix
#     print("Building original matrix...")
#     for i in range(len(pids)):
#         playlist = playlists[pids[i]]
#         if "name_original" in playlist.features:
#             #name_original_matrix.setValue(i,playlist.features["name_original"], 1)
#             name_original_matrix_rows.append(i)
#             name_original_matrix_cols.append(playlist.features["name_original"])
#             name_original_matrix_vals.append(1)
#     name_original_matrix = csr_matrix((name_original_matrix_vals, (name_original_matrix_rows, name_original_matrix_cols)), shape=(num_playlists,num_unique["name_original"]))
#     print("Done building original matrix.")

# #n_tracks_matrix = SparseMatrix(1, num_playlists) # just a vector
# n_tracks_matrix_rows = []
# n_tracks_matrix_cols = []
# n_tracks_matrix_vals = []
# n_tracks_matrix = None
# def buildNTracks():
#     global n_tracks_matrix
#     print("Building nTracks matrix...")
#     for i in range(len(pids)):
#         playlist = playlists[pids[i]]
#         if "n_tracks" in playlist.features:
#             #n_tracks_matrix.setValue(0,i, playlist.features["n_tracks"])
#             n_tracks_matrix_rows.append(i)
#             n_tracks_matrix_cols.append(0)
#             n_tracks_matrix_vals.append(playlist.features["n_tracks"])
#     n_tracks_matrix = csr_matrix((n_tracks_matrix_vals, (n_tracks_matrix_rows, n_tracks_matrix_cols)), shape=(num_playlists,1))
#     print("Done building nTracks matrix.")

# tokenizedThread = Thread(target=buildTokenized)
# regexedThread = Thread(target=buildRegexed)
# originalThread = Thread(target=buildOriginal)
# nTracksThread = Thread(target=buildNTracks)

# tokenizedThread.start()
# regexedThread.start()
# originalThread.start()
# nTracksThread.start()

# tokenizedThread.join()
# regexedThread.join()
# originalThread.join()
# nTracksThread.join()

# del playlists


# print("Constructed sparse feature matrix")
# print("Features for Name Tokens:", num_unique["name_tokenized"])
# print("Features for Regexed Name:", num_unique["name_regexed"])
# print("Features for Original Name:", num_unique["name_original"])
# print("Features for Number of Tracks: 1")

## Get Song Features

In [None]:
# song_features = ['track_name', 'artist_id', 'album_id', 'duration']

# for feature in song_features:
#     num_unique[feature] = 0

# track_name_map = {}
# artist_id_map = {}
# album_id_map = {}

# track_count = 0
    
# for track_id in tqdm.tqdm(track_ids):
#     track_count += 1
#     track = tracks[track_id]
#     for feature in song_features:
#         if feature == "track_name":
#             name = track.track_name
#             # vl6 uses a lucene text tokenizer, mine is a bit different.
#             # this treats plural as different than the original
#             # it also allows emoji/unicode characters that I think they remove.
#             # may want to tweak
#             tokens_full = nltk.word_tokenize(name)
#             token_indices = []
#             for token in tokens_full:
#                 simp = token.lower()
#                 simp = simp.translate(punctRemover)
#                 index = getIndexForFeature(simp, track_name_map, feature)
#                 if index not in token_indices:
#                     token_indices.append(index)
#             if len(token_indices) > 0:
#                 track.features[feature] = token_indices
#             else:
#                 track.features[feature] = None
#         elif feature == "artist_id":
#             artist_id = track.artist_uri
#             index = getIndexForFeature(artist_id, artist_id_map, feature)
#             track.features[feature] = index
#         elif feature == "album_id":
#             album_id = track.album_uri
#             index = getIndexForFeature(album_id, album_id_map, feature)
#             track.features[feature] = index
#         elif feature == "duration":
#             duration = track.duration_ms # vl6 converts this to seconds?
#             track.features[feature] = duration 

# del track_name_map
# del artist_id_map
# del album_id_map

# num_tracks = len(track_ids)

# #build track_name feature matrix
# #track_name_matrix = SparseMatrix(num_tracks, num_unique["track_name"])
# track_name_matrix_rows = []
# track_name_matrix_cols = []
# track_name_matrix_vals = []
# track_name_matrix = None
# def buildTrackName():
#     global track_name_matrix
#     print("Building track name matrix...")
#     for i in range(len(track_ids)):
#         track = tracks[track_ids[i]]
#         if "track_name" in track.features:
#             for token_index in track.features["track_name"]:
#                 #track_name_matrix.setValue(i, token_index, 1)
#                 track_name_matrix_rows.append(i)
#                 track_name_matrix_cols.append(token_index)
#                 track_name_matrix_vals.append(1)
#     track_name_matrix = csr_matrix((track_name_matrix_vals, (track_name_matrix_rows, track_name_matrix_cols)), shape=(num_tracks,num_unique["track_name"]))
#     print("Done building track name matrix.")

# #build artist_id feature matrix
# #artist_id_matrix = SparseMatrix(num_tracks, num_unique["artist_id"])
# artist_id_matrix_rows = []
# artist_id_matrix_cols = []
# artist_id_matrix_vals = []
# artist_id_matrix = None
# def buildArtistID():
#     global artist_id_matrix
#     print("Building artist ID matrix...")
#     for i in range(len(track_ids)):
#         track = tracks[track_ids[i]]
#         if "artist_id" in track.features:
#             #artist_id_matrix.setValue(i,track.features['artist_id'], 1)
#             artist_id_matrix_rows.append(i)
#             artist_id_matrix_cols.append(track.features['artist_id'])
#             artist_id_matrix_vals.append(1)
#     artist_id_matrix = csr_matrix((artist_id_matrix_vals, (artist_id_matrix_rows, artist_id_matrix_cols)), shape=(num_tracks,num_unique["artist_id"]))
#     print("Done building artist ID matrix.")

# #build album_id feature matrix
# #album_id_matrix = SparseMatrix(num_tracks, num_unique["album_id"])
# album_id_matrix_rows = []
# album_id_matrix_cols = []
# album_id_matrix_vals = []
# album_id_matrix = None
# def buildAlbumID():
#     global album_id_matrix
#     print("Building album ID matrix...")
#     for i in range(len(track_ids)):
#         track = tracks[track_ids[i]]
#         if "album_id" in track.features:
#             #album_id_matrix.setValue(i,track.features["album_id"], 1)
#             album_id_matrix_rows.append(i)
#             album_id_matrix_cols.append(track.features["album_id"])
#             album_id_matrix_vals.append(1)
#     album_id_matrix = csr_matrix((album_id_matrix_vals, (album_id_matrix_rows, album_id_matrix_cols)), shape=(num_tracks,num_unique["album_id"]))
#     print("Done building album ID matrix.")

# #build duration feature matrix
# #duration_matrix = SparseMatrix(1, num_tracks)
# duration_matrix_rows = []
# duration_matrix_cols = []
# duration_matrix_vals = []
# duration_matrix = None
# def buildDuration():
#     global duration_matrix
#     print("Building duration matrix...")
#     for i in range(len(track_ids)):
#         track = tracks[track_ids[i]]
#         if "duration" in track.features:
#             #duration_matrix.setValue(0,i,track.features["duration"])
#             duration_matrix_rows.append(i)
#             duration_matrix_cols.append(0)
#             duration_matrix_vals.append(track.features["duration"])
#     duration_matrix = csr_matrix((duration_matrix_vals, (duration_matrix_rows, duration_matrix_cols)), shape=(num_tracks,1))
#     print("Done building duration matrix.")
            
# trackThread = Thread(target=buildTrackName)
# artistThread = Thread(target=buildArtistID)
# albumThread = Thread(target=buildAlbumID)
# durationThread = Thread(target=buildDuration)

# trackThread.start()
# artistThread.start()
# albumThread.start()
# durationThread.start()

# trackThread.join()
# artistThread.join()
# albumThread.join()
# durationThread.join()  



# print("Constructed sparse feature matrix")
# print("Features for Name Tokens:", num_unique["track_name"])
# print("Features for Artist ID:", num_unique["artist_id"])
# print("Features for Album ID:", num_unique["album_id"])
# print("Features for Duration: 1")

## Build Interaction Matrix

In [5]:
#interaction_matrix = SparseMatrix(num_playlists, num_tracks)
interaction_matrix_rows = []
interaction_matrix_cols = []
interaction_matrix_vals = []
#interaction_matrix = dok_matrix((num_playlists,num_tracks))

row_count = 0
test_indexes = []
r_train = dok_matrix((num_playlists,num_tracks))
num_to_pick = int(len(potential_eval_pids) / 10)
eval_pids = np.random.choice(potential_eval_pids, num_to_pick)
correct_ids = {}
for row in tqdm.tqdm(range(len(pids))):
    row_count += 1
    is_eval = pids[row] in eval_pids
    if is_eval:
        correct_ids[pids[row]] = []
    ints = interactions[pids[row]]
    if pids[row] in test_pids:
        test_indexes.append(row)
    for (track_id, count) in ints.items():
        index = track_id_to_index[track_id]
        #interaction_matrix.setValue(row, index, count)
        if not is_eval or tracks[track_id].artist_uri not in local_artists[city_to_test]:
            interaction_matrix_rows.append(row)
            interaction_matrix_cols.append(index)
            interaction_matrix_vals.append(count)
        if not is_eval:
            r_train[row,index] = count
        elif tracks[track_id].artist_uri in local_artists[city_to_test]:
            correct_ids[pids[row]].append(tracks[track_id].artist_uri)
        #interaction_matrix[row,index] = count
    #for playlist_track in playlist.tracks:
    #    track_uri = playlist_track.track.track_uri
    #    col = track_id_to_index[track_uri]
    #    interaction_matrix[row,col] = 1

del interactions
interaction_matrix = csr_matrix((interaction_matrix_vals, (interaction_matrix_rows, interaction_matrix_cols)), shape=(num_playlists,num_tracks))
print("Built interaction matrix for", row_count, "playlists.")

100%|██████████| 1010000/1010000 [17:17<00:00, 973.53it/s] 


Built interaction matrix for 1010000 playlists.


In [6]:
eval_pids = list(dict.fromkeys(eval_pids).keys())

## Generate Split
##### (Want to go over)

In [None]:
#r_train = SparseMatrix(num_playlists, num_tracks, interaction_matrix.getInteractions())
#r_valid = SparseMatrix(num_playlists, num_tracks)
# print("copying interaction matrix to dok")
# r_train = dok_matrix(interaction_matrix)
# print("done copy")
    


# valid_indexes = []
# added_indexes = set()
# n_exact = 0
# n_at_least = 0

# test_length = len(test_indexes)
# test_indexes = np.array(test_indexes)
# test_indexes.sort()
# rows=find(interaction_matrix)[0]

# the_range = range(num_playlists)
# is_test = np.isin(np.array(the_range), test_indexes)
# num_tracks_arr = n_tracks_matrix.toarray().T[0]

# for count in tqdm.tqdm(range(len(test_indexes))):
#     index = test_indexes[count]
#     #print("checking has items", index)
#     if not np.any(rows==index):
#         #print("NO ITEMS")
#         continue
#     #print("done checking items")
#     n_tracks_total = n_tracks_matrix[index,0]
#     n_tracks_train = interaction_matrix[index].sum()
#     #print(time.time(),"good playlist")

#     #find training playlists with n_tracks
#     exact = []
#     at_least = []
    
#     #print("starting calc exact, at_least")
    
    
    
#     for i in np.extract(np.all([is_test==False,num_tracks_arr>=n_tracks_total], axis=0), the_range):
#         #skip test playlists
        
#         # NOTE: they did unique songs here, I'm just doing straight number of songs
#         n_tracks = num_tracks_arr[i]
#         if n_tracks == n_tracks_total:
#             exact.append(i)
#         elif n_tracks > n_tracks_total:
#             at_least.append(i)
#     #print("ending calc exact, at_least")
#     #print(time.time(),"got exact and at least")

#     np.random.shuffle(exact)
#     np.random.shuffle(at_least)
    
#     #print("staring valid")
#     repeat = 0
#     while repeat < 10:
#         #print(repeat)
#         valid_index = None
#         if valid_index is None:
#             while len(exact) > 0:
#                 candidate = exact[0]
#                 del exact[0]

#                 if candidate not in added_indexes:
#                     valid_index = candidate
#                     n_exact += 1
#                     break
#         if valid_index is None:
#             while len(at_least) > 0:
#                 candidate = at_least[0]
#                 del at_least[0]
                
#                 if candidate not in added_indexes:
#                     valid_index = candidate
#                     n_at_least += 1
#                     break
#         if valid_index is None:
#             break
#         added_indexes.add(valid_index)
#         if repeat == 0:
#             valid_indexes.append(valid_index)
#         repeat += 1
        
#         #split row at valid_index
#         row_interactions = find(interaction_matrix[valid_index])
            
#         row_indexes = np.array(range(len(row_interactions[0])))
#         np.random.shuffle(row_indexes)
        
#         train_indexes = set()
#         for i in range(n_tracks_train):
#             train_indexes.add(row_interactions[1][row_indexes[i]])
        
#         for index in row_indexes:
#             col = row_interactions[1][index]
#             val = row_interactions[2][index]
#             if col in train_indexes:
#                 #the randomly sampled tracks from this playlist
#                 #r_train.setValue(valid_index, col, val) #note, this UPDATES the value, as train was a copy of interactions
#                 r_train[valid_index,col] = val
#             else:
#                 #r_valid.setValue(valid_index, col, val)  
#                 r_valid[valid_index,col] = val
#     #print("ending valid")
#     #print(time.time(),"done")
        
# valid_indexes = np.array(valid_indexes)
# valid_indexes.sort()

# valid_cols = np.array(range(num_tracks))
        
# print("Generated Split for",count + 1,"playlists.")
# print("n_exact:", n_exact)
# print("n_at_least:", n_at_least)
# print("valid_indexes:", len(valid_indexes))

## Weighted Regularized Matrix Factorization (WRMF)

In [7]:
model = implicit.als.AlternatingLeastSquares(factors=224, use_gpu=False) ## power of 8 for gpu usage
model.fit(r_train.T)
metric_list = []
ndcg_list = []
rec_list = []
correct_list = []

playlist_id=eval_pids[0]
for playlist_id in eval_pids:
    playlist_metrics_x = []
    playlist_metrics_y = []
    remaining_artists = list(local_artists[city_to_test].keys()).copy()
    ndcg = []
    print("pid:",playlist_id)
    row = pids.index(playlist_id)
    #print(row)
    #print(local_artists["Nashville"])
    recs = model.recommend(row, interaction_matrix, N=num_tracks, recalculate_user=True)
    rec_list.append(recs)
    correct_list.append(correct_ids[playlist_id])
    interactions = interaction_matrix[row]
    print("IN PLAYLIST")
    for interaction in interactions.nonzero()[1]:
        track = tracks[track_ids[interaction]]
        print(track.track_name, "by",track.artist_name)
        if track.artist_uri in local_artists[city_to_test]:
            print("^^LOCAL")
    print("\nRECOMMENDS")
    count=1
    curY = 0
    for rec,score in recs:
        track = tracks[track_ids[rec]]
        if track.artist_uri in remaining_artists:
            playlist_metrics_x.append(count)
            if track.artist_uri in correct_ids[playlist_id]:
                escape=Back.GREEN
                curY += 1
                ndcg.append(1)
            else:
                escape=Back.RED
                ndcg.append(0)
            print(escape,track.track_name, "by",track.artist_name, "score:",score,"AT POSITION:",count)
            remaining_artists.remove(track.artist_uri)
            playlist_metrics_y.append(curY)
            count+=1
    print(Style.RESET_ALL + "\n\n\n")
    metrics = (playlist_metrics_x, playlist_metrics_y)
    metric_list.append(metrics)
    ndcg_list.append(ndcg)

100%|██████████| 15.0/15 [24:00<00:00, 101.13s/it]


pid: 116797
IN PLAYLIST
Retrograde by James Blake
Me and Your Mama by Childish Gambino
Them Changes by Thundercat
Broccoli (feat. Lil Yachty) by DRAM
Let Me Love You by DJ Snake
Caroline by Aminé
We Don't Talk Anymore (feat. Selena Gomez) by Charlie Puth
Sexual by NEIKED
He Won't Go by Adele
Pink + White by Frank Ocean
Open by Rhye
Disparate Youth by Santigold
Fade by Kanye West
Ocean Drive by Duke Dumont
Gangsta by Kehlani
Side To Side by Ariana Grande
Nikes by Frank Ocean
Feel No Ways by Drake
Lost by Frank Ocean
Bulletproof by La Roux
Take Me Away by Christina Vidal
Hold On by SBTRKT
In the Meantime by KING
Self Control by Frank Ocean
Breeze by xxyyxx
Hush by Magic City Hippies
Froot by Marina and the Diamonds
Evil by Interpol
I-69 by Roman GianArthur
Animal Spirits by Vulfpeck
Set Me Free by Herizen Guardiola as Mylene Cruz
Up The Ladder by Herizen Guardiola as Mylene Cruz
Fuck Me Pumps by Amy Winehouse
YOU'RE THE ONE by KAYTRANADA
Season 2 Episode 3 by Glass Animals
Cruel by St. V

[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.00031977640142 AT POSITION: 6
[41m Helpless by Emarosa score: 0.000319006738195 AT POSITION: 7
[41m Stars by Sego score: 0.000193340646548 AT POSITION: 8
[41m B4 The Night Is Thru by Jesse Boykins III score: 0.00012568446425 AT POSITION: 9
[41m Why Don't You Save Me? by Kan Wakan score: 0.000120657997822 AT POSITION: 10
[41m Obsession by Harriet Brown score: 5.93729340541e-05 AT POSITION: 11
[41m Playgirl by Ladytron score: 5.74641105752e-05 AT POSITION: 12
[41m Out of Drugs by Kelsy Karter score: 4.55644079948e-05 AT POSITION: 13
[42m Apathy by Mikal Cronin score: 4.08303898461e-05 AT POSITION: 14
[41m Wasting Time by Orchin score: 2.68587612071e-05 AT POSITION: 15
[41m You Got Snaked by No Parents score: 2.05803148572e-05 AT POSITION: 16
[41m Fashion Show by Facial score: 2.04418129705e-05 AT POSITION: 17
[41m I Dreamt Blues by Vikesh Kapoor score: 1.84620261

[42m A Toast to the future kids! by Emarosa score: 0.00123926616831 AT POSITION: 3
[41m I Feel It by Avid Dancer score: 0.00114055781137 AT POSITION: 4
[41m Weight by Mikal Cronin score: 0.00103227771444 AT POSITION: 5
[41m Destroy Everything You Touch by Ladytron score: 0.00078628681521 AT POSITION: 6
[41m I'm So Confused by Goldensuns score: 0.00044878819225 AT POSITION: 7
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.000348540330535 AT POSITION: 8
[41m Why Don't You Save Me? by Kan Wakan score: 0.000273000765599 AT POSITION: 9
[41m B4 The Night Is Thru by Jesse Boykins III score: 0.000173648697503 AT POSITION: 10
[41m Sleeping In by Phil Good score: 0.00012330232251 AT POSITION: 11
[41m Stars by Sego score: 6.97680282863e-05 AT POSITION: 12
[41m Hey Grandma by No Parents score: 5.89723310292e-05 AT POSITION: 13
[41m Out to Sea by Kacey Johansing score: 5.2829494699e-05 AT POSITION: 14
[41m Wasting Time 

[41m Emergency House Party by American Steel score: 4.41031375441e-06 AT POSITION: 22
[41m Five And Dime To Hollywood by Divided Heaven score: 2.62879270742e-06 AT POSITION: 23
[41m Fashion Show by Facial score: 2.23022204064e-06 AT POSITION: 24
[41m Night Drool by Vs Colour score: 1.66570648715e-06 AT POSITION: 25
[41m Southwest Airlines by Anjelah Johnson score: 1.65122195898e-06 AT POSITION: 26
[41m Miranda by Fawns of Love score: 6.03558900069e-07 AT POSITION: 27
[41m Red Hot Sand by Flat Worms score: 5.03231130795e-07 AT POSITION: 28
[41m Bad Day by Spanish Love Songs score: 4.09673137408e-07 AT POSITION: 29
[41m Slam Poetry by Eric Andre score: 7.96917439809e-08 AT POSITION: 30
[41m Star Wars Fans Are Uber Nerds by Bobcat Goldthwait score: 3.10152439723e-08 AT POSITION: 31
[41m The Christmas Party Scene by Jamie Drake score: 1.53984884006e-08 AT POSITION: 32
[41m Europe by Scott Gilmore score: 4.75457277087e-09 AT POSITION: 33
[41m Is this Love by Jorge Serrano score

[41m Emergency House Party by American Steel score: 5.70124399933e-06 AT POSITION: 25
[41m Searching for the Sun by Vikesh Kapoor score: 5.60017318374e-06 AT POSITION: 26
[41m Fashion Show by Facial score: 3.85720950512e-06 AT POSITION: 27
[41m Dark by Orchin score: 1.6521529844e-06 AT POSITION: 28
[41m Is this Love by Jorge Serrano score: 5.08786875459e-07 AT POSITION: 29
[41m The Christmas Party Scene by Jamie Drake score: 2.60305812071e-07 AT POSITION: 30
[41m Slam Poetry by Eric Andre score: 1.47760596807e-07 AT POSITION: 31
[41m Star Wars Fans Are Uber Nerds by Bobcat Goldthwait score: 1.22908240878e-08 AT POSITION: 32
[41m Europe by Scott Gilmore score: 1.47298285425e-09 AT POSITION: 33
[41m Miranda by Fawns of Love score: -2.80235882514e-07 AT POSITION: 34
[41m Five And Dime To Hollywood by Divided Heaven score: -1.7243740081e-06 AT POSITION: 35
[41m Vermont by Spanish Love Songs score: -3.03320539235e-06 AT POSITION: 36
[0m



pid: 998061
IN PLAYLIST
Retrograde by 

[41m Love Is to Die by Warpaint score: 0.0139943943657 AT POSITION: 1
[41m Weight by Mikal Cronin score: 0.00499861074608 AT POSITION: 2
[42m Destroy Everything You Touch by Ladytron score: 0.00440051743789 AT POSITION: 3
[41m I Wanna Prove to You by The Lemon Twigs score: 0.00283389875676 AT POSITION: 4
[41m I Feel It by Avid Dancer score: 0.00274202027366 AT POSITION: 5
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.00190469616795 AT POSITION: 6
[41m I Wish by Jesse Boykins III score: 0.00130716081842 AT POSITION: 7
[41m I'm So Confused by Goldensuns score: 0.000858590952137 AT POSITION: 8
[41m Why Don't You Save Me? by Kan Wakan score: 0.000830719618705 AT POSITION: 9
[41m A Toast to the future kids! by Emarosa score: 0.000522271426187 AT POSITION: 10
[41m Wicket Youth by Sego score: 0.000256611732297 AT POSITION: 11
[41m Sleeping In by Phil Good score: 0.000169221744627 AT POSITION: 12
[41m Piedmont Gir

[41m I Feel It by Avid Dancer score: 0.00326726886685 AT POSITION: 1
[41m Love Is to Die by Warpaint score: 0.00326705991514 AT POSITION: 2
[41m I Wanna Prove to You by The Lemon Twigs score: 0.00279153894004 AT POSITION: 3
[41m Weight by Mikal Cronin score: 0.00119016213582 AT POSITION: 4
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.00116203470528 AT POSITION: 5
[41m Growing Up by Phil Good score: 0.00098106533967 AT POSITION: 6
[41m I'm So Confused by Goldensuns score: 0.000917858533922 AT POSITION: 7
[42m A Toast to the future kids! by Emarosa score: 0.000602951686706 AT POSITION: 8
[41m Earth Girls by Jesse Boykins III score: 0.000570941497606 AT POSITION: 9
[41m Destroy Everything You Touch by Ladytron score: 0.000536013822015 AT POSITION: 10
[41m Molasses by Kan Wakan score: 0.00028191260892 AT POSITION: 11
[41m Stars by Sego score: 0.000216355878044 AT POSITION: 12
[41m I Know - Alternate Reality I

[41m I Wanna Prove to You by The Lemon Twigs score: 0.00300658248967 AT POSITION: 3
[41m I'm So Confused by Goldensuns score: 0.000849334326596 AT POSITION: 4
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.000849265674233 AT POSITION: 5
[41m Destroy Everything You Touch by Ladytron score: 0.000612582348966 AT POSITION: 6
[41m Helpless by Emarosa score: 0.000455394491244 AT POSITION: 7
[41m Sleeping In by Phil Good score: 0.000454015249131 AT POSITION: 8
[41m B4 The Night Is Thru by Jesse Boykins III score: 0.000338479254919 AT POSITION: 9
[41m Weight by Mikal Cronin score: 0.000335542864338 AT POSITION: 10
[41m Stars by Sego score: 0.000254062997743 AT POSITION: 11
[41m Molasses by Kan Wakan score: 0.000201415023542 AT POSITION: 12
[41m I Know - Alternate Reality Island Vacation Version by Harriet Brown score: 8.83019766273e-05 AT POSITION: 13
[41m Wasting Time by Orchin score: 4.60816355728e-05 AT POSITION:

[41m Destroy Everything You Touch by Ladytron score: 3.15410315448e-06 AT POSITION: 4
[41m You Only Like Me with the Lights Out by Avid Dancer score: 2.82427791029e-06 AT POSITION: 5
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 1.42285399905e-06 AT POSITION: 6
[41m I'm So Confused by Goldensuns score: 1.31419191432e-06 AT POSITION: 7
[41m B4 The Night Is Thru by Jesse Boykins III score: 7.28047778747e-07 AT POSITION: 8
[41m Wicket Youth by Sego score: 3.31589420093e-07 AT POSITION: 9
[41m Out to Sea by Kacey Johansing score: 2.61611559698e-07 AT POSITION: 10
[41m From Timid to Timbuktu by Local Hero score: 2.57655813135e-07 AT POSITION: 11
[41m Molasses by Kan Wakan score: 2.25371976368e-07 AT POSITION: 12
[41m You Got Snaked by No Parents score: 2.06815179486e-07 AT POSITION: 13
[41m I Miss You by Phil Good score: 1.94502091569e-07 AT POSITION: 14
[41m Wasting Time by Orchin score: 1.27307756952e-07 AT POSI

The Debt by Half Moon Run
Burnstick by Close Talker
Great Unknown by Close Talker
The Silence I by Close Talker
The Silence II by Close Talker
Patmos by Close Talker
Take It Back by Close Talker
Slow Weather by Close Talker
Bonfire by Close Talker
To the Coast by Close Talker
Younger Days by Close Talker
Consider Yourself by Half Moon Run
Concentrate On Me by Rococode
Empire by Rococode
Blood by Rococode
Weapon by Rococode
Dreams by Rococode
Death of a Payphone by Rococode
Tina by Rococode
Guns, Sex & Glory by Rococode
Hunter Gather by Rococode
Panic Attack by Rococode
Banks by Rococode
The Escape by Rococode
Never Tear Us Apart by Rococode
Panic Attack (Humans Remix) by Rococode

RECOMMENDS
[42m I Feel It by Avid Dancer score: 0.00153533048113 AT POSITION: 1
[41m New Song by Warpaint score: 0.000819160861157 AT POSITION: 2
[41m I Wanna Prove to You by The Lemon Twigs score: 0.000348020097076 AT POSITION: 3
[41m Weight by Mikal Cronin score: 0.000250766118747 AT POSITION: 4
[41m I

Georgia by Emily King
Nightgowns by Tom Misch
Better Off Without You by Aquilo
Falling Short by Låpsley
Souvenirs by Dream Koala
No Police by Doja Cat
Girl - Radio Edit by The Internet
Prototype by OutKast
Move On by Garden City Movement
Ttktv by Injury Reserve
Lay-by by Tennyson
Sinkies by Hot Sugar
Sober Thoughts by GoldLink
When I Die by GoldLink
Once I Was Loved by Melody Gardot
Future Being by RKCB
Minnesota, WI by Bon Iver
Solitude - Remastered Version by Black Sabbath
Fall In Love (Your Funeral) by Erykah Badu
Sunday by Earl Sweatshirt
Panic Cord - Hucci Remix by Gabrielle Aplin
Release You by Tom Misch
No Sleeep by Janet Jackson
Say Yes by Floetry
Strings by Young the Giant
Use Your Heart by SWV
9 by Willow
Pray - Album Mix by The Erised
Streetcar by Daniel Caesar
You Haunt Me - Amtrac Remix by Sir Sly
Mama Says by Ibeyi
Shadow Dance by The Internet
I Heard by Young Fathers
Said So What by French Kicks
All The Joy by Moonchild
Sugar Snap by Tora
Lilly by Toro y Moi
Camelblues b

[41m Wasting Time by Orchin score: 1.00235409921e-05 AT POSITION: 16
[41m Ad_renaline by Kara-Lis Coverdale score: 8.95820964487e-06 AT POSITION: 17
[41m You Got Snaked by No Parents score: 7.28792181471e-06 AT POSITION: 18
[41m Hold On by Raquel Rodriguez score: 6.49132836643e-06 AT POSITION: 19
[41m Emergency House Party by American Steel score: 5.77383991254e-06 AT POSITION: 20
[41m Fashion Show by Facial score: 3.68168741788e-06 AT POSITION: 21
[41m Always by Sister Mantos score: 3.38858475372e-06 AT POSITION: 22
[41m I Dreamt Blues by Vikesh Kapoor score: 3.23653481375e-06 AT POSITION: 23
[41m We Three Pistoliers by Local Hero score: 2.85626412706e-06 AT POSITION: 24
[41m It's So True by Spain score: 1.61793372068e-06 AT POSITION: 25
[41m Five And Dime To Hollywood by Divided Heaven score: 1.37304829345e-06 AT POSITION: 26
[41m Night Drool by Vs Colour score: 9.26034058146e-07 AT POSITION: 27
[41m Red Hot Sand by Flat Worms score: 6.45672861128e-07 AT POSITION: 28
[4

[42m I Wish by Jesse Boykins III score: 0.000920506326055 AT POSITION: 1
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.000888682614224 AT POSITION: 2
[41m Love Is to Die by Warpaint score: 0.000737391841154 AT POSITION: 3
[41m You Only Like Me with the Lights Out by Avid Dancer score: 0.000390833290318 AT POSITION: 4
[41m Growing Up by Phil Good score: 0.000259715461257 AT POSITION: 5
[41m Why Don't You Save Me? by Kan Wakan score: 0.000160167100905 AT POSITION: 6
[41m Destroy Everything You Touch by Ladytron score: 0.000159517609447 AT POSITION: 7
[41m Weight by Mikal Cronin score: 0.000149902259536 AT POSITION: 8
[41m I'm So Confused by Goldensuns score: 0.000123720703395 AT POSITION: 9
[41m Mad by Emarosa score: 0.000118709768434 AT POSITION: 10
[41m These Words by The Lemon Twigs score: 7.92341206549e-05 AT POSITION: 11
[41m Wicket Youth by Sego score: 5.4676698898e-05 AT POSITION: 12
[41m Ad_renaline 

[41m Destroy Everything You Touch by Ladytron score: 0.00479636815579 AT POSITION: 2
[41m Weight by Mikal Cronin score: 0.00448902044304 AT POSITION: 3
[41m I Wanna Prove to You by The Lemon Twigs score: 0.00366212247602 AT POSITION: 4
[41m You Only Like Me with the Lights Out by Avid Dancer score: 0.00260952642659 AT POSITION: 5
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.00246039590098 AT POSITION: 6
[41m I'm So Confused by Goldensuns score: 0.00107515566102 AT POSITION: 7
[41m Amorous by Jesse Boykins III score: 0.0010566970343 AT POSITION: 8
[41m Why Don't You Save Me? by Kan Wakan score: 0.000617534407806 AT POSITION: 9
[41m A Toast to the future kids! by Emarosa score: 0.000520274483047 AT POSITION: 10
[41m Wicket Youth by Sego score: 0.00019433728328 AT POSITION: 11
[41m Sleeping In by Phil Good score: 0.000163415894584 AT POSITION: 12
[41m You Got Snaked by No Parents score: 9.90613659976e-05 AT P

[41m Ad_renaline by Kara-Lis Coverdale score: 1.82820821685e-05 AT POSITION: 20
[41m Already Beat by Raquel Rodriguez score: 1.47627999745e-05 AT POSITION: 21
[41m Night Drool by Vs Colour score: 1.11297993522e-05 AT POSITION: 22
[41m Always by Sister Mantos score: 9.61564799255e-06 AT POSITION: 23
[41m Fashion Show by Facial score: 8.6832044896e-06 AT POSITION: 24
[41m Sleeping in (The Ready Set Remix) by Phil Good score: 8.54434307316e-06 AT POSITION: 25
[41m Out of Drugs by Kelsy Karter score: 6.0016150376e-06 AT POSITION: 26
[41m Southwest Airlines by Anjelah Johnson score: 5.63961881549e-06 AT POSITION: 27
[41m Five And Dime To Hollywood by Divided Heaven score: 3.81286574921e-06 AT POSITION: 28
[41m The Christmas Party Scene by Jamie Drake score: 2.83797442333e-06 AT POSITION: 29
[41m Is this Love by Jorge Serrano score: 1.02667700018e-06 AT POSITION: 30
[41m Miranda by Fawns of Love score: 8.572367319e-07 AT POSITION: 31
[41m Slam Poetry by Eric Andre score: 2.32603

67 by MellowHype
New Map by M83
Taking You There by Broods
I Won't Let You Down by Alex Clare
Young Boys by Sin Fang
Brighter Days by Saint Raymond
Carry Me by Bombay Bicycle Club
Top Floor by Lunice
Electricity from Dance Moms by Ashley Jana
G.R.I.N.D. (Get Ready It's A New Day) by Asher Roth
Objects Objects by Deptford Goth
Find You by Suvi
Violent Silence - EP Version by Beatrice Eli
Machines by Mason Jennings
Restless by Kakkmaddafakka
Stare Into The Sun - Edit by Graffiti6
Maui Wowie by Kid Cudi
Hail Mary by K.Flay
ShutEmDown by Celldweller
Red in the Grey by MØ
Take Me Out by Tom Milsom
Young Boys - Jónsi Remix by Sin Fang
TGIF by Kid Cudi
Tell Me When It's Over by The Dream Syndicate
Mad World by It's A Cover Up
Gurl (feat. Abra) by Father
Lead Balloon by Carroll
Annabelle by Dessa
...And the Walls Came Tumbling Down by Hot Sugar
Hej, Me I'm Light by Phosphorescent

RECOMMENDS
[41m Love Is to Die by Warpaint score: 0.0147418255031 AT POSITION: 1
[41m Weight by Mikal Cronin sco

[41m I Wanna Prove to You by The Lemon Twigs score: 0.000106636841248 AT POSITION: 2
[41m Weight by Mikal Cronin score: 0.000105285048292 AT POSITION: 3
[42m Destroy Everything You Touch by Ladytron score: 9.48521388587e-05 AT POSITION: 4
[41m I Feel It by Avid Dancer score: 9.28392453348e-05 AT POSITION: 5
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 7.21023975796e-05 AT POSITION: 6
[41m I Wish by Jesse Boykins III score: 4.12163773919e-05 AT POSITION: 7
[41m I'm So Confused by Goldensuns score: 3.06501156309e-05 AT POSITION: 8
[41m A Toast to the future kids! by Emarosa score: 1.86777703097e-05 AT POSITION: 9
[41m Growing Up by Phil Good score: 9.75653137855e-06 AT POSITION: 10
[41m Molasses by Kan Wakan score: 8.5937570362e-06 AT POSITION: 11
[41m Stars by Sego score: 7.55268030453e-06 AT POSITION: 12
[41m Out of Drugs by Kelsy Karter score: 3.36790010456e-06 AT POSITION: 13
[41m You Got Snaked by No Par

Lady And Her Son, Ex-Boyfriends Are Hungry Wolves by Joe Rogan
Bisexual Dudes Giving Advice by Joe Rogan
What's This Dude's Recipe? by Big Jay Oakerson
Performing For The President by David Spade
Dog Barking by Brian Regan
TV Shows by Brian Regan
Monster Truck Drivers by Brian Regan
Mushrooms, Vegan by Shane Mauss
This Track is Not Called Dopeman by Kyle Kinane
Hope by Jenny Zigrino
For by Jenny Zigrino
Hipsters and Burning Man by Natasha Leggero
Becoming A Man by Big Jay Oakerson
Asian Girls (Revisited) by Big Jay Oakerson
Blacks - GH Version by Chris Rock
This Woman I Met by Steven Wright
Office People (And A Perfectly Good Font Joke) by Christian Finnegan
Bad, Bad Things by Anthony Jeselnik
Ball Wipes by Nikki Glaser
Dick Tricks by Nikki Glaser
Detroit Rock City by Kyle Kinane
Fine by Marc Maron
Fun Dad / Blonde Moustaches by Pete Holmes
Killer In The Backseat by Pete Holmes
The Brothers Warren by Greg Warren
Plantar Fasciitis by Greg Warren
Bigfoot and Flute Man by Greg Warren
One 

[41m Eric Andre by Eric Andre score: -2.65565791064e-11 AT POSITION: 34
[41m Out of Drugs by Kelsy Karter score: -1.16402050737e-06 AT POSITION: 35
[41m Five And Dime To Hollywood by Divided Heaven score: -5.23651455988e-06 AT POSITION: 36
[0m



pid: 99882
IN PLAYLIST
Saddr Weirdr by Bomb the Music Industry!
Knowledge by Operation Ivy
Hey by The Suicide Machines
Lola - Coca Cola Version by The Kinks
Let's Dance by Ramones
Train in Vain - Remastered by The Clash
Bankrobber by The Clash
Add It Up - 2002 Remastered Version by Violent Femmes
I Wanna Be Your Dog by The Stooges
Maxwell Murder by Rancid
Linoleum by NOFX
Pretty Girls (The Mover) by Against Me!
Lean On Sheena by The Bouncing Souls
Orgasm Addict by Buzzcocks
Spiderman by Ramones
Wrong 'Em Boyo by The Clash
Silly Girl by Descendents
Kate Is Great by The Bouncing Souls
Gangsters - 2015 Remaster by The Specials
Ghetto Defendant by The Clash
Problems by Sex Pistols
Oh, There's Legwork by None More Black
I Wanna Be Your Boyfrien

[42m Destroy Everything You Touch by Ladytron score: 0.000109332160794 AT POSITION: 5
[41m Helpless by Emarosa score: 4.94790480428e-05 AT POSITION: 6
[41m I'm So Confused by Goldensuns score: 1.7760315719e-05 AT POSITION: 7
[41m Piedmont Girls by Local Hero score: 1.60747338972e-05 AT POSITION: 8
[41m Like I Need You by Kan Wakan score: 1.31377363243e-05 AT POSITION: 9
[41m Stars by Sego score: 1.24950405784e-05 AT POSITION: 10
[41m Sleeping in (Sean Turk Remix) [feat. Sean Turk] by Phil Good score: 1.08767298409e-05 AT POSITION: 11
[41m Your Ass Ain't Laughing Now by American Steel score: 8.08698362962e-06 AT POSITION: 12
[41m Wasting Time by Orchin score: 6.16751773836e-06 AT POSITION: 13
[41m I Know - Alternate Reality Island Vacation Version by Harriet Brown score: 4.49193546954e-06 AT POSITION: 14
[41m Tell Me (feat. Theophilus London ) by Jesse Boykins III score: 3.90809123767e-06 AT POSITION: 15
[41m You Got Snaked by No Parents score: 3.78802145675e-06 AT POSITION:

[41m Out of Drugs by Kelsy Karter score: 9.51830812415e-05 AT POSITION: 12
[41m Piedmont Girls by Local Hero score: 8.31025950208e-05 AT POSITION: 13
[41m I Know - Alternate Reality Island Vacation Version by Harriet Brown score: 7.90382001228e-05 AT POSITION: 14
[41m Out to Sea by Kacey Johansing score: 6.49314603397e-05 AT POSITION: 15
[41m Ad_renaline by Kara-Lis Coverdale score: 4.18071517516e-05 AT POSITION: 16
[41m Wasting Time by Orchin score: 3.81138481119e-05 AT POSITION: 17
[41m You Got Snaked by No Parents score: 3.61887742702e-05 AT POSITION: 18
[41m Already Beat by Raquel Rodriguez score: 2.22917552923e-05 AT POSITION: 19
[41m Our Love Is Gonna Live Forever by Spain score: 1.97843159459e-05 AT POSITION: 20
[41m Always by Sister Mantos score: 1.72096635151e-05 AT POSITION: 21
[41m Dead and Broken by American Steel score: 1.25296478077e-05 AT POSITION: 22
[41m I Dreamt Blues by Vikesh Kapoor score: 8.71059243308e-06 AT POSITION: 23
[41m Intro by The Lemon Twigs 

[41m You Got Snaked by No Parents score: 3.12069632785e-05 AT POSITION: 15
[41m From Timid to Timbuktu by Local Hero score: 2.76705630356e-05 AT POSITION: 16
[41m Wasting Time by Orchin score: 2.70314282347e-05 AT POSITION: 17
[41m Our Love Is Gonna Live Forever by Spain score: 2.55136485761e-05 AT POSITION: 18
[41m I Dreamt Blues by Vikesh Kapoor score: 1.95095642826e-05 AT POSITION: 19
[41m Ad_renaline by Kara-Lis Coverdale score: 1.19527859763e-05 AT POSITION: 20
[41m Shrapnel by American Steel score: 1.05453342737e-05 AT POSITION: 21
[41m Out of Drugs by Kelsy Karter score: 1.04378214039e-05 AT POSITION: 22
[41m Always by Sister Mantos score: 1.04331141788e-05 AT POSITION: 23
[41m Night Drool by Vs Colour score: 7.4372859227e-06 AT POSITION: 24
[41m Already Beat by Raquel Rodriguez score: 4.9374273194e-06 AT POSITION: 25
[41m Fashion Show by Facial score: 1.32733141819e-06 AT POSITION: 26
[41m Red Hot Sand by Flat Worms score: 1.28520945479e-06 AT POSITION: 27
[41m My

[41m Love Is to Die by Warpaint score: 0.00135642066432 AT POSITION: 1
[41m I Feel It by Avid Dancer score: 0.000651083828776 AT POSITION: 2
[41m I Wanna Prove to You by The Lemon Twigs score: 0.00042486202547 AT POSITION: 3
[41m Weight by Mikal Cronin score: 0.000399801418725 AT POSITION: 4
[41m Afro Blue (9th Wonder's Blue Light Basement Remix) [feat. Erykah Badu and Phonte] by Robert Glasper score: 0.000361128014187 AT POSITION: 5
[41m Destroy Everything You Touch by Ladytron score: 0.000353795446031 AT POSITION: 6
[41m I Wish by Jesse Boykins III score: 0.000293869294525 AT POSITION: 7
[41m I'm So Confused by Goldensuns score: 0.000172388622574 AT POSITION: 8
[41m Why Don't You Save Me? by Kan Wakan score: 0.000119751921224 AT POSITION: 9
[41m Sleeping In by Phil Good score: 0.000103034904272 AT POSITION: 10
[41m Wicket Youth by Sego score: 4.48715310043e-05 AT POSITION: 11
[41m Helpless by Emarosa score: 2.98984011648e-05 AT POSITION: 12
[41m I Know - Alternate Realit

In [39]:
def r_precision(recs, correct):
    global tracks, track_ids, local_artists, city_to_test
    remaining_artists = list(local_artists[city_to_test].keys()).copy()
    num_to_check = len(correct)
    num_correct = 0
    i=0
    i2=0
    while i < num_to_check:
        rec, score = recs[i2]
        track = tracks[track_ids[rec]]
        if track.artist_uri in remaining_artists:
            if track.artist_uri in correct:
                num_correct += 1
            i+=1
            remaining_artists.remove(track.artist_uri)
        i2+=1
    return float(num_correct) / float(num_to_check)

def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_full(r, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    k = len(r)
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max

def clicks(recs, correct):
    global tracks, track_ids, local_artists, city_to_test
    remaining_artists = list(local_artists[city_to_test].keys()).copy()
    i=0
    i2=0
    while i2 < len(recs):
        rec, score = recs[i2]
        track = tracks[track_ids[rec]]
        if track.artist_uri in remaining_artists:
            if track.artist_uri in correct:
                return int(i/10)
            i+=1
            remaining_artists.remove(track.artist_uri)

        i2+=1
            

In [40]:
auc_list = []
r_prec_list = []
ndcg_metric_list = []
click_list = []

for i in tqdm.tqdm(range(len(rec_list))):
    auc = skmet.auc(*(metric_list[i])) / (len(correct_list[i]) * len(metric_list[i][0])) #normalize
    r_prec = r_precision(rec_list[i], correct_list[i])
    ndcg = ndcg_full(ndcg_list[i])
    click_count = clicks(rec_list[i], correct_list[i])
    
    auc_list.append(auc)
    r_prec_list.append(r_prec)
    ndcg_metric_list.append(ndcg)
    click_list.append(click_count)
    
print("AVERAGE AUC:", np.mean(auc_list))
print("AVERAGE R_PRECISION:", np.mean(r_prec_list))
print("AVERAGE NDCG:", np.mean(ndcg_metric_list))
print("AVERAGE CLICKS:", np.mean(click_list))

    

100%|██████████| 35/35 [00:11<00:00,  3.63it/s]

AVERAGE AUC: 0.56936017686
AVERAGE R_PRECISION: 0.17973026973
AVERAGE NDCG: 0.609615077633
AVERAGE CLICKS: 0.371428571429





In [12]:
r_prec_list

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]

In [None]:
# model = implicit.als.AlternatingLeastSquares(factors=200)
# model.fit(interaction_matrix.T)

# U = model.user_factors
# V = model.item_factors
# U.dump("U")
# V.dump("V")

In [None]:
# print(correct_ids)
# print(local_artists[city_to_test])

In [None]:
# playlist_id=eval_pids[0]
# for playlist_id in eval_pids:
#     remaining_artists = list(local_artists[city_to_test].keys()).copy()
#     print("pid:",playlist_id)
#     row = pids.index(playlist_id)
#     #print(row)
#     #print(local_artists["Nashville"])
#     recs = model.recommend(row, interaction_matrix, N=num_tracks)
#     interactions = interaction_matrix[row]
#     print("IN PLAYLIST")
#     for interaction in interactions.nonzero()[1]:
#         track = tracks[track_ids[interaction]]
#         print(track.track_name, "by",track.artist_name)
#         if track.artist_uri in local_artists[city_to_test]:
#             print("^^LOCAL")
#     print("\nRECOMMENDS")
#     count=1
#     for rec,score in recs:
#         track = tracks[track_ids[rec]]
#         if track.artist_uri in remaining_artists:
#             if track.artist_uri in correct_ids[playlist_id]:
#                 escape=Back.GREEN
#             else:
#                 escape=Back.RED
#             print(escape,track.track_name, "by",track.artist_name, "score:",score,"AT POSITION:",count)
#             remaining_artists.remove(track.artist_uri)
#         count+=1
#     print(Style.RESET_ALL + "\n\n\n")


## Get Playlist-to-Playlist and Song-to-Song Rankings

In [None]:
# playlist_to_playlist = np.zeros((num_playlists,num_playlists))
# for i in tqdm.tqdm(range(num_playlists)):
#     #train_cols = r_train[i].nonzero()[1]
#     sim = model.similar_users(i,num_playlists)
#     for col, val in sim:
#         #if not np.any(train_cols==col):
#         #if col not in train_cols:
#         #if np.any(train_cols==col):
#         #if col in train_cols:
#         #    continue
#         if r_train[i,col] == 0:
#             playlist_to_playlist[i,col] = val
# playlist_to_playlist

In [None]:
#song_to_song = np.zeros((num_tracks,num_tracks))
#for i in tqdm.tqdm(range(num_tracks)):
#    train_cols = r_train[i].nonzero()[1]
#    sim = model.similar_items(i,num_tracks)
#    for col, val in sim:
#        if not np.any(train_cols==col):
#            song_to_song[i,col] = val
#song_to_song

In [None]:
#r_train.nonzero()

## Evalutation

## 