# Testing the Baseline Models
Saved in the `baselinemodels` module

### Imports

In [5]:
import random
import os
import csv

# from tqdm import tqdm # for scripts
from tqdm.auto import tqdm # for notebooks
tqdm.pandas()

import numpy as np
import pandas as pd

import tensorflow as tf

# Update as needed, for example: from jlj.baselinemodels import BaselineModels
from baselinemodels import BaselineModels

# Importing 2017 data
Copying cells from `load_2017_data.ipynb` notebook, which produces the following:

Loads the following csv's into **DataFrames**:
- `playlist_2017.csv` $\rightarrow$ `playlist_df`
- `tracks_2017.csv` $\rightarrow$ `track_df`
- `albums_2017.csv` $\rightarrow$ `album_df`
- `artist_2017.csv` $\rightarrow$ `artist_df`

Then, the following **dictionaries** are created:
- `playlist_dict`
- `track_dict`
- `album_dict`
- `artist_dict`

As well as the following **functions**:
- `get_playlist_feature(PID, feature)`
- `get_track_feature(URI, feature)`
- `get_album_feature(URI, feature)`
- `get_artist_feature(URI, feature)`

### Update data path as needed

In [6]:
# path to the DIRECTORY where CSV files are saved relative to this notebook
path = '../data'
output_filepath_root = os.path.relpath(path)
print(output_filepath_root)

../data


## Load 2017 CSVs to DFs

### Playlists

In [7]:
playlist_df = pd.read_csv(os.path.join(output_filepath_root, 'playlists_2017.csv'), index_col='pid')
playlist_df.head(5)

Unnamed: 0_level_0,name,description,modified_at,num_artists,num_albums,num_tracks,num_followers,num_edits,duration_ms,collaborative,...,track_14_album_uri,track_14_artist_uri,track_15_uri,track_15_album_uri,track_15_artist_uri,modified_at_date,modified_at_year,modified_at_month,modified_at_day,modified_at_dow
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
434000,Sad,,1488240000,24,26,27,1,6,6081757,False,...,spotify:album:4M9Ti6t5h54aDMX4SizDfT,spotify:artist:4vVfuZfXWu18vk5Z4C7wbm,spotify:track:3yrVRdwCbEeKODZgG2mVZX,spotify:album:3SCJmoy3Z45p84IfuaM9YQ,spotify:artist:2EO56JK4txid1Pss9GVbOL,2017-02-28,2017,2,28,1
434001,pb&j,faves tbh,1487808000,35,38,39,1,9,8959761,False,...,spotify:album:6deiaArbeoqp1xPEGdEKp1,spotify:artist:0L8ExT028jH3ddEcZwqJJ5,spotify:track:5E30LdtzQTGqRvNd7l6kG5,spotify:album:18iFxjZugvKhuNNMbLjZJF,spotify:artist:77SW9BnxLY8rJ0RciFqkHh,2017-02-23,2017,2,23,3
434004,Whatever,,1506816000,36,57,79,1,46,18874072,False,...,spotify:album:2Tyx5dLhHYkx6zeAdVaTzN,spotify:artist:4LLpKhyESsyAXpc4laK94U,spotify:track:0htTZnlk6okQ1HIq4EvFQ6,spotify:album:6liIoWzpvrff945pUI7fHt,spotify:artist:02kJSzxNuaWGqwubyUba0Z,2017-10-01,2017,10,1,6
434005,roadtrip,,1492905600,30,35,70,1,18,15696608,False,...,spotify:album:6DwdzG4UGYLxJ2p7bd483v,spotify:artist:2Q0MyH5YMI5HPQjFjlq5g3,spotify:track:4XvKjZWIqsHvvza89lMTAH,spotify:album:6izXZb0VGaUHqm5GaXq4YC,spotify:artist:5IXalAOiV9I8LgLMGZydmt,2017-04-23,2017,4,23,6
434007,Cumbias,,1507507200,23,42,50,1,21,10255699,False,...,spotify:album:392uNMyh5D6fqkBS385XJd,spotify:artist:3zzeZVLuOeetfimOd4k8rE,spotify:track:1yOLrH7nF0R7MWDuuva6va,spotify:album:2rXbAorimO8C06RkqS2oq5,spotify:artist:0OhiQFSqbnnmB52NWEpsO5,2017-10-09,2017,10,9,0


### Tracks

In [8]:
track_df = pd.read_csv(os.path.join(output_filepath_root, 'tracks_2017.csv'), index_col='track_uri')
track_df.head(5)

Unnamed: 0_level_0,index,track_name,album_name,album_uri,artist_name,artist_uri,duration_ms,appears_in_count,popularity
track_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
spotify:track:6SbAbLqAWf2tnTdUy6Gmm5,0,FUCKING BEST SONG EVERRR,FUCKING BEST SONG EVERRR,spotify:album:1hmvZb81DAeTx67G1FaTjZ,Wallpaper.,spotify:artist:6NMcnx3vKGSAeqSMbySlpw,217800,53,53
spotify:track:1MvpPH6BTP3IrLnTjEA2gw,1,#STUPiDFACEDD,#STUPiDFACEDD,spotify:album:1c7wJm9mghFyIKnQJOobW8,Wallpaper.,spotify:artist:6NMcnx3vKGSAeqSMbySlpw,184026,39,39
spotify:track:5rgy6ghBq1eRApCkeUdJXf,2,We Are Young (feat. Janelle Monáe) - feat. Jan...,Some Nights,spotify:album:7m7F7SQ3BXvIpvOgjW51Gp,fun.,spotify:artist:5nCi3BB41mBaMH9gfr6Su0,250626,1276,1276
spotify:track:07dYGGSrzPeg6a3KZjWX65,3,Boyfriend,Believe,spotify:album:7BWK3eXcbAdwYeulyQj5Kw,Justin Bieber,spotify:artist:1uNFoZAHBGtllmzznpCI3s,171333,704,704
spotify:track:1NpW5kyvO4XrNJ3rnfcNy3,4,Wild Ones (feat. Sia),Wild Ones,spotify:album:7eLwoxxWs6lfkVYJGkGNbk,Flo Rida,spotify:artist:0jnsk9HBra6NMjO2oANoPY,232946,952,952


### Albums

In [9]:
album_df = pd.read_csv(os.path.join(output_filepath_root, 'albums_2017.csv'), index_col='album_uri')
album_df.head(5)

Unnamed: 0_level_0,album_name,artist_name
album_uri,Unnamed: 1_level_1,Unnamed: 2_level_1
spotify:album:1hmvZb81DAeTx67G1FaTjZ,FUCKING BEST SONG EVERRR,Wallpaper.
spotify:album:1c7wJm9mghFyIKnQJOobW8,#STUPiDFACEDD,Wallpaper.
spotify:album:7m7F7SQ3BXvIpvOgjW51Gp,Some Nights,fun.
spotify:album:7BWK3eXcbAdwYeulyQj5Kw,Believe,Justin Bieber
spotify:album:7eLwoxxWs6lfkVYJGkGNbk,Wild Ones,Flo Rida


### Artists

In [10]:
artist_df = pd.read_csv(os.path.join(output_filepath_root, 'artists_2017.csv'), index_col='artist_uri')
artist_df.head(5)

Unnamed: 0_level_0,artist_name
artist_uri,Unnamed: 1_level_1
spotify:artist:6NMcnx3vKGSAeqSMbySlpw,Wallpaper.
spotify:artist:5nCi3BB41mBaMH9gfr6Su0,fun.
spotify:artist:1uNFoZAHBGtllmzznpCI3s,Justin Bieber
spotify:artist:0jnsk9HBra6NMjO2oANoPY,Flo Rida
spotify:artist:4AK6F7OLvEQ5QYCBNiQWHq,One Direction


## Create Dictionaries

### Playlists

In [11]:
playlist_dict = playlist_df.to_dict()

In [12]:
def get_playlist_feature(pid, feature='name', dictionary=playlist_dict):
    """
    Given a playlist PID (as a integer) and the playlist dictionary, return the requested feature.
    
    Features include:
        - 'name'
        - 'description'
        - 'modified_at'
        - 'modified_at_date'
        - 'modified_at_year'
        - 'modified_at_month'
        - 'modified_at_day'
        - 'modified_at_dow'
        - 'num_artists'
        - 'num_albums'
        - 'num_tracks'
        - 'num_followers'
        - 'num_edits'
        - 'duration_ms'
        - 'collaborative'
        - 'track_X_uri'
        - 'track_X_album_uri'
        - 'track_X_artist_uri'
    """
    # Convert PID to integer
    try:
        pid = int(pid)
    except Exception as e:
        return e
    # Lookup in dictionary
    try:
        return dictionary[feature][pid]
    except Exception as e:
        return e

In [13]:
get_playlist_feature('434004', 'name')

'Whatever'

### Tracks

In [14]:
track_dict = track_df.to_dict()

In [15]:
def get_track_feature(uri, feature='track_name', dictionary=track_dict):
    """
    Given a track uri (as a string) and the track dictionary, return the requested feature.
    
    Features include:
        - 'track_name'
        - 'album_name'
        - 'album_uri'
        - 'artist_name'
        - 'artist_uri'
        - 'duration_ms'
    """
    try:
        return dictionary[feature][uri]
    except Exception as e:
        return e

In [16]:
get_track_feature('spotify:track:5rgy6ghBq1eRApCkeUdJXf', 'album_name')

'Some Nights'

### Albums

In [17]:
album_dict = album_df.to_dict()

In [18]:
def get_album_feature(uri, feature='album_name', dictionary=album_dict):
    """
    Given an album uri (as a string) and the album dictionary, return the requested feature.
    
    Features include:
        - 'album_name'
        - 'artist_name'
    """
    try:
        return dictionary[feature][uri]
    except Exception as e:
        return e

In [19]:
get_album_feature('spotify:album:7BWK3eXcbAdwYeulyQj5Kw', 'artist_name')

'Justin Bieber'

### Artists

In [20]:
artist_dict = artist_df.to_dict()

In [21]:
def get_artist_feature(uri, feature='artist_name', dictionary=artist_dict):
    """
    Given an artist uri (as a string) and the artist dictionary, return the requested feature.
    
    Features include:
        - 'artist_name'
    """
    try:
        return dictionary[feature][uri]
    except Exception as e:
        return e

In [22]:
get_artist_feature('spotify:artist:1uNFoZAHBGtllmzznpCI3s')

'Justin Bieber'

In [23]:
track_uri_cols = [col for col in playlist_df.columns
                  if 'uri' in col
                  and 'album' not in col
                  and 'artist' not in col]
track_uri_cols

['track_1_uri',
 'track_2_uri',
 'track_3_uri',
 'track_4_uri',
 'track_5_uri',
 'track_6_uri',
 'track_7_uri',
 'track_8_uri',
 'track_9_uri',
 'track_10_uri',
 'track_11_uri',
 'track_12_uri',
 'track_13_uri',
 'track_14_uri',
 'track_15_uri']

___
# Baseline Models
Initialize with `BaselineModels(playlist_df, track_df)`

Functions:
- `calculate_track_popularities(pop_col_name=<string>)`
- `recommend_tracks_by_artists(num_tracks=5, input_track_uris=[], popularity=True)`
- `recommend_tracks_by_albums(num_tracks=5, input_track_uris=[], popularity=True)`
- `recommend_popular_tracks(num_tracks=5, input_track_uris=[], popularity=True)`



In [24]:
def random_tracks(num_tracks=5):
    return track_df.sample(num_tracks).reset_index()

## Baseline 1 — Artist

In [33]:
get_track_feature('spotify:track:6SbAbLqAWf2tnTdUy6Gmm5')

'FUCKING BEST SONG EVERRR'

In [25]:
baseline_models = BaselineModels(playlist_df=playlist_df, track_df=track_df)

In [31]:
tracks = random_tracks(20)
display(tracks)
track_uris = tracks.track_uri.to_list()
display(track_uris)
display([get_track_feature(uri) for uri in track_uris])
display([get_track_feature(uri, 'artist_name') for uri in track_uris])

Unnamed: 0,track_uri,index,track_name,album_name,album_uri,artist_name,artist_uri,duration_ms,appears_in_count,popularity
0,spotify:track:2z2HMfMK1vR4BZLL6JIWgV,230277,"Te quiero, te amo",Tu Sombra,spotify:album:2d7tfROtI0Sdsy2TYJMDpe,Pesado,spotify:artist:4BwiodzEp9Hwes5HeFjMVK,216893,11,11
1,spotify:track:2oA6tAT5u3cc8ND5crtH0k,90261,Black Eyes,The Family Tree: The Roots,spotify:album:7sU8zin3JDka5r87BGKpo4,Radical Face,spotify:artist:5EM6xJN2QNk0cL7EEm9HR9,284200,6,6
2,spotify:track:5Am1Vy5vnBnu7pbTJUBzlc,446895,Fastnet,Cascades,spotify:album:63DzFVIYUZgy24E8GRIuQS,High Highs,spotify:artist:1cXVTtkpqSXVhyD32f9MS4,242440,3,3
3,spotify:track:6xlw9Qx9VDEvTdxOT98wFd,98622,BillyThe Kid,Echo,spotify:album:1lsBEh2R9B39hWfEo4iZSW,Tom Petty and the Heartbreakers,spotify:artist:4tX2TplrkIP4v05BNC903e,248933,1,1
4,spotify:track:6I2fP8K1GbElogsAekOQfd,195992,"Dragon Soul (From ""Dragon Ball Z Kai"")","Dragon Soul (from ""Dragon Ball Z Kai"")",spotify:album:5pSvoIvSEFl8DB4sWS3pFe,NateWantsToBattle,spotify:artist:0Vb15td3iKkAzdGD5Sj9Ky,99259,4,4
5,spotify:track:0Hap9hOcxX0ywu8q9FkcQO,250837,Unravel (Tokyo Ghoul OP (Acoustic),Unravel (Tokyo Ghoul OP (Acoustic),spotify:album:7zp3GuHlCnMhwNZj9SDLAP,Theishter,spotify:artist:2Hqn367geFndjn7SaFNn49,262622,3,3
6,spotify:track:4qpBYiY4YeTRN2D4Iv3tcF,491749,Intro (Primus/Antipop),Antipop,spotify:album:23gj6qqto5Y2yYcocJPkmB,Primus,spotify:artist:64mPnRMMeudAet0E62ypkx,17053,1,1
7,spotify:track:4K84p5DmcoUm3pp64qzkHY,355713,Mecnun,Sahiden,spotify:album:5X8RxGuAlwaNQg0g4FZw9K,Buray,spotify:artist:1qZ684TB9E1BjH58btdtYd,200272,1,1
8,spotify:track:78U6lqeWK6Uie4jneibj9o,124300,Once in a While (feat. Sonya Kitchell),Seven Bridges,spotify:album:76HffpRQ7URIZGkUo4eDWt,Break Science,spotify:artist:1O9rhv6tMHchfb6Qx2yd6O,193411,21,21
9,spotify:track:79MZL2xQVtxULJIOqrJ4lA,253830,The Waiting,Fading Love,spotify:album:1v6BipCE7YBMOBcAN0j0EH,George FitzGerald,spotify:artist:3KOHpygRuo1ruQAbEneR3t,268268,2,2


['spotify:track:2z2HMfMK1vR4BZLL6JIWgV',
 'spotify:track:2oA6tAT5u3cc8ND5crtH0k',
 'spotify:track:5Am1Vy5vnBnu7pbTJUBzlc',
 'spotify:track:6xlw9Qx9VDEvTdxOT98wFd',
 'spotify:track:6I2fP8K1GbElogsAekOQfd',
 'spotify:track:0Hap9hOcxX0ywu8q9FkcQO',
 'spotify:track:4qpBYiY4YeTRN2D4Iv3tcF',
 'spotify:track:4K84p5DmcoUm3pp64qzkHY',
 'spotify:track:78U6lqeWK6Uie4jneibj9o',
 'spotify:track:79MZL2xQVtxULJIOqrJ4lA',
 'spotify:track:7cQnIhLin7koR2sO2bt2KS',
 'spotify:track:4nBW45Y4EVoaeggnS2r2Xp',
 'spotify:track:688DZF6e1MH5Uf409dwaHm',
 'spotify:track:1EmE0i81dj6zuqqDdY4B0o',
 'spotify:track:6tkFPLPrY97itA4Hbpa3Dp',
 'spotify:track:6HsKzNUXenFnXhpEteenK8',
 'spotify:track:3t708maRP7qBBuvRmNbWxS',
 'spotify:track:0FXfAepEmqVCMnvOpb8Hoj',
 'spotify:track:5aIVRSOeadwiwvkssC2aut',
 'spotify:track:1LdBSURpSKoZAzdLj9FrlH']

['Te quiero, te amo',
 'Black Eyes',
 'Fastnet',
 'BillyThe Kid',
 'Dragon Soul (From "Dragon Ball Z Kai")',
 'Unravel (Tokyo Ghoul OP (Acoustic)',
 'Intro (Primus/Antipop)',
 'Mecnun',
 'Once in a While (feat. Sonya Kitchell)',
 'The Waiting',
 'So Am I (feat. Damian Marley & Skrillex)',
 'O Astronauta',
 'The Wilhelm Scream',
 'Wonderful',
 'Beautiful Alarms',
 'Cold Water',
 'Lora',
 'Hell Breaks Loose',
 'Rainbow Ends',
 'See You In Hell']

['Pesado',
 'Radical Face',
 'High Highs',
 'Tom Petty and the Heartbreakers',
 'NateWantsToBattle',
 'Theishter',
 'Primus',
 'Buray',
 'Break Science',
 'George FitzGerald',
 'Ty Dolla $ign',
 'Baden Powell',
 'James Blake',
 'Casey Veggies',
 'Wilderness',
 'Grace Grundy',
 'Coma',
 'Eminem',
 'Emitt Rhodes',
 'The Creepshow']

In [32]:
baseline_1a_songs = baseline_models.recommend_tracks_by_artists(10, input_track_uris=track_uris, popularity=False)
display([get_track_feature(uri) for uri in baseline_1a_songs])
display([get_track_feature(uri, 'artist_name') for uri in baseline_1a_songs])

['Shepherd in Sheeps Clothing',
 'Tú como si nada',
 'Les Dilettantes - Roosevelt Mix',
 'Desperation',
 'U.S. 41',
 'With My Face On The Floor',
 'Love = Hate, Ulterior Motives',
 'Sora Ni Utaeba',
 'Ken Kaniff - Skit',
 'Yuri!!! on ICE Original Soundtrack - Yuri on ICE']

['Wilderness',
 'Pesado',
 'Coma',
 'Eminem',
 'Tom Petty and the Heartbreakers',
 'Emitt Rhodes',
 'Casey Veggies',
 'NateWantsToBattle',
 'Eminem',
 'Theishter']

In [None]:
baseline_1b_songs = baseline_models.recommend_tracks_by_artists(10, input_track_uris=track_uris, popularity=True)
display([get_track_feature(uri) for uri in baseline_1b_songs])
display([get_track_feature(uri, 'artist_name') for uri in baseline_1b_songs])

## Baseline 2 — Album

In [None]:
baseline_2a_songs = baseline_models.recommend_tracks_by_albums(10, input_track_uris=track_uris, popularity=False)
display([get_track_feature(uri) for uri in baseline_2a_songs])
display([get_track_feature(uri, 'album_name') for uri in baseline_2a_songs])

In [None]:
baseline_2b_songs = baseline_models.recommend_tracks_by_albums(10, input_track_uris=track_uris, popularity=True)
display([get_track_feature(uri) for uri in baseline_2b_songs])
display([get_track_feature(uri, 'album_name') for uri in baseline_2b_songs])

## Baseline 3 — Popularity

In [None]:
baseline_3_songs = baseline_models.recommend_popular_tracks(10, input_track_uris=track_uris)
display([get_track_feature(uri) for uri in baseline_3_songs])
display([get_track_feature(uri, 'artist_name') for uri in baseline_3_songs])

In [None]:
def recommend_tracks_by_artist(num_tracks=5, input_track_uris=[], popularity=True):
        """
        Recommend tracks by the same artists as those in input_track_uris.
        If `popularity` is set to True, recommend popular songs by the same artist;
        if False, recommend random songs by the same artist.
        """
        
        recommended_track_uris = []

        # Create list of artists presented in input tracks
        input_artists = []
        for track_uri in input_track_uris:
            artist_uri = get_track_feature(track_uri, 'artist_uri')
            input_artists.append(artist_uri)

        # RECOMMENDING SONGS BY POPULARITY
        if popularity:
            # Create recommendations by randomly picking an artist
            # and finding their most popular tracks
            # that aren't already in the playlist
            while len(recommended_track_uris) < num_tracks:

                # Pick a random artist to get songs by
                this_artist = random.choice(input_artists)

                # Find this artist's top songs and save track_uris in a list
                artist_top_songs = track_df.\
                    sort_values(by='popularity', ascending=False)[track_df.artist_uri == this_artist]\
                    .reset_index()['track_uri']\
                    .to_list()

                # Loop through in order of popularity and add songs not already in playlist
                # and not already in recommendation list
                while True:
                    try_song = artist_top_songs.pop(0)
                    if try_song in input_track_uris or try_song in recommended_track_uris:
                        # If this song is already in the playlist, do nothing
                        # If there are no more songs left to try to recommend, break without appending
                        if len(artist_top_songs) == 0:
                            break
                    else:
                        # If not already in playlist, add this song to the recommendations and break
                        recommended_track_uris.append(try_song)
                        break
        
        # RECOMMENDING ARTIST SONGS RANDOMLY
        else:
            # Create recommendations by randomly picking an artist
            # and recommending a random song by them
            while len(recommended_track_uris) < num_tracks:
                print(len(recommended_track_uris))

                # Pick a random artist to get songs by
                this_artist = random.choice(input_artists)
                print(this_artist)

                # Find this artist's songs
                # If no results found, try again
                while True:
                    try:
                        artist_songs_df = track_df[track_df.artist_uri == this_artist].reset_index()
                        break
                    except Exception as e:
                        continue
                print(artist_songs_df.shape)
                
                if artist_songs_df.shape[0] == 1:
                    continue
                
                # Loop until we find a song not already in the playlist
                attempts = 0
                while True or attempts <= artist_songs_df.shape[0]:
                    try_song = artist_songs_df.sample()['track_uri'].iloc[0]
                    print(try_song)

                    if try_song in input_track_uris or try_song in recommended_track_uris:
                        # If this song is already in the playlist, do nothing.
                        attempts += 1
                        print(attempts)
                        pass
                    else:
                        # If not already in playlist, add this song to the recommendations and break
                        recommended_track_uris.append(try_song)
                        break

        # Return recommendations
        return recommended_track_uris

In [None]:
# test = recommend_tracks_by_artist(num_tracks=10, input_track_uris=track_uris, popularity=False)

In [None]:
# [get_track_feature(uri) for uri in test]

In [None]:
# track_df.reset_index().sample()['track_uri'].iloc[0]