# Wilson's Morning Wake Up Playlist Generator, Modeling and Learning

The following steps will be executed:

* Upload your data to S3.
* Define a benchmark and candidate models and training scripts
* Train models and deploy.
* Evaluate deployed estimator.

In [6]:
# Defaults
import os
import sys

import pandas as pd
import numpy as np

from scipy.spatial.distance import cdist
from tqdm.notebook import tqdm
from sklearn.externals import joblib



### Create database

In [64]:
import sqlite3
from sqlite3 import Error

def create_connection(db_file):
    """ create a database connection to a SQLite database """
    
    conn = None
    
    try:
        conn = sqlite3.connect(db_file)
        print(sqlite3.version)
        return conn
    except Error as e:
        print(e)

In [65]:
def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
#     try:
    c = conn.cursor()
    c.execute(create_table_sql)
#     except Error as e:
#         print(e)

In [66]:
exclude_cols = ['track_name', 'artist_name', 'duration_ms', 'track_href', 'uri', 'id', 'type', 'mode','key']

feature_list = ['danceability','energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']


In [69]:
def main():
    database = r"C:\sqlite\wmw\wmw.db"
 
    sql_create_playlists_table = """CREATE TABLE IF NOT EXISTS playlists (
                                        id text PRIMARY KEY,
                                        name text NOT NULL
                                    ); """
 
    sql_create_tracks_table = """CREATE TABLE IF NOT EXISTS tracks (
                                    id text PRIMARY KEY,
                                    track_name text NOT NULL,
                                    track_href text NOT NULL,
                                    duration_ms integer NOT NULL,
                                    danceability integer NOT NULL,
                                    energy integer NOT NULL,
                                    loudness integer NOT NULL,
                                    speechiness integer NOT NULL,
                                    acousticness integer NOT NULL, 
                                    instrumentalness integer NOT NULL,
                                    liveness integer NOT NULL,
                                    valence integer NOT NULL,
                                    tempo integer NOT NULL,
                                    mode integer NOT NULL,
                                    key integer NOT NULL,
                                    playlist_id text NOT NULL,
                                    FOREIGN KEY (playlist_id) REFERENCES playlists (id)
                                );"""
    
    sql_create_artists_table = """CREATE TABLE IF NOT EXISTS artists (
                                    id text PRIMARY KEY,
                                    artist_name text NOT NULL,
                                    track_id text NOT NULL,
                                    FOREIGN KEY (track_id) REFERENCES tracks (id)
                                );"""
 
    # create a database connection
    conn = create_connection(database)
 
    # create tables
    try:
        if conn is not None:
            # create projects table
            create_table(conn, sql_create_playlists_table)

            # create tracks table
            create_table(conn, sql_create_tracks_table)

            # create artists table
            create_table(conn, sql_create_artists_table)
    except Error as e:
        print(e)
    finally:
        if conn:
            conn.close()

In [None]:
def create_playlist(conn, playlist):
    """
    Create a new playlist into the playlists table
    :param conn:
    :param playlist:
    :return: playlist id
    """
    sql = ''' INSERT INTO playlists(id, name)
              VALUES(?,?) '''
    cur = conn.cursor()
    cur.execute(sql, playlist)
    return cur.lastrowid

In [70]:
if __name__ == '__main__':
    main()

2.6.0


In [2]:
# import boto3
# import sagemaker

In [3]:
# # session and role
# sagemaker_session = sagemaker.Session()
# role = sagemaker.get_execution_role()

# # create an S3 bucket
# bucket = sagemaker_session.default_bucket()

In [4]:
!ls -la data

'ls' is not recognized as an internal or external command,
operable program or batch file.


## Upload your training data to S3

In [5]:
# should be the name of directory you created to save your features data
data_dir = 'data'

In [6]:
# # set prefix, a descriptive name for a directory  
# prefix = 'sagemaker/wmw_estimator'

# # upload all data to S3
# input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix)

---

# Modeling

It's time to define and train the models!

---

## Complete a training script 

To implement a custom estimator, I need to complete a `train.py` script. 

A typical training script:
* Loads training data from a specified directory
* Parses any training & model hyperparameters (ex. nodes in a neural network, training epochs, etc.)
* Instantiates a model of your design, with any specified hyperparams
* Trains that model 
* Finally, saves the model so that it can be hosted/deployed, later

### Defining and training a model

To complete a `train.py` file, you will:
1. Import any extra libraries you need
2. Define any additional model training hyperparameters using `parser.add_argument`
2. Define a model in the `if __name__ == '__main__':` section
3. Train the model in that same section


In [7]:
# Directory of train.py
!pygmentize model/train.py

Error: cannot read infile: [Errno 2] No such file or directory: 'model/train.py'


---
# Create an Estimator

When a custom model is constructed in SageMaker, an entry point must be specified. This is the Python file which will be executed when the model is trained; the `train.py` function you specified above. To run a custom training script in SageMaker, construct an estimator, and fill in the appropriate constructor arguments:

* **entry_point**: The path to the Python script SageMaker runs for training and prediction.
* **source_dir**: The path to the training script directory `source_sklearn` OR `source_pytorch`.
* **entry_point**: The path to the Python script SageMaker runs for training and prediction.
* **source_dir**: The path to the training script directory `train_sklearn` OR `train_pytorch`.
* **entry_point**: The path to the Python script SageMaker runs for training.
* **source_dir**: The path to the training script directory `train_sklearn` OR `train_pytorch`.
* **role**: Role ARN, which was specified, above.
* **train_instance_count**: The number of training instances (should be left at 1).
* **train_instance_type**: The type of SageMaker instance for training. Note: Because Scikit-learn does not natively support GPU training, Sagemaker Scikit-learn does not currently support training on GPU instance types.
* **sagemaker_session**: The session used to train on Sagemaker.
* **hyperparameters** (optional): A dictionary `{'name':value, ..}` passed to the train function as hyperparameters.

Note: For a PyTorch model, there is another optional argument **framework_version**, which you can set to the latest version of PyTorch, `1.0`.

## Define PyTorch estimators

In [39]:
# Build sequences and targets
def create_playlist_sequences(input_data):
    input_playlists = []
    
    for i in input_data['volume'].unique():
        temp_vol = input_data[input_data['volume'] == i]
        X = temp_vol.iloc[:, 2:11].values
        y = temp_vol.iloc[:, 11:].values
        input_playlists.append((X, y))
        
    return input_playlists

In [40]:
from unittest.mock import MagicMock, patch

def _print_success_message():
    print('Tests Passed!')

def test_playlist_sequences(input_playlists):
    
    track_features = [-2.39099487, -2.63509459, -0.27732204,  0.92969533, -0.48983686,-1.15691947,  1.08569029, -1.20454903,  2.09618458, -5.37044178, 0.23380331]
    
    track_features_len = 11
    target_features_len = 8
    
    # check shape and equality of first track
    assert len(input_playlists[0][0][0]) == len(track_features), \
        'Number of features in input_playlist features does not match expected number of ' + str(len(track_features))    
    
    # check shape of input and output arrays
    assert input_playlists[0][0].shape[1]==track_features_len, \
        'input_features should have as many columns as selected features, got: {}'.format(train_x.shape[1])
    assert input_playlists[0][1].shape[1]==target_features_len, \
        'target_features should have as many columns as selected features, got: {}'.format(train_x.shape[1])
    
    #TODO: Add more tests
    
    _print_success_message()

### Test run of benchmark and candidate models and train components
Here I will see if the configurations I have set work accordingly with no errors. Once it runs smoothly, I will instantiate an estimator using the Sagemaker API.

In [7]:
import os
import torch
import torch.utils.data

train_data = pd.read_csv(os.path.join(data_dir, "train.csv"))

# Gather sequences and targets
processed_data = create_playlist_sequences(train_data)

NameError: name 'data_dir' is not defined

In [42]:
# Training function for LSTM
def train_lstm(model, train_loader, epochs, criterion, optimizer, device):
    """
    This is the training method that is called by the PyTorch training script of the LSTM model. The parameters
    passed are as follows:
    model        - The PyTorch model that we wish to train.
    train_loader - The PyTorch DataLoader that should be used during training.
    epochs       - The total number of epochs to train for.
    criterion    - The loss function used for training. 
    optimizer    - The optimizer to use during training.
    device       - Where the model and data should be loaded (gpu or cpu).
    """
    
    # training loop is provided
    for epoch in range(1, epochs + 1):
        
        model.train() # Make sure that the model is in training mode.

        total_loss = 0

        for batch in train_loader:
            
            # get data
            batch_x, batch_y = batch
            
            # 
            batch_x = torch.from_numpy(batch_x).float().squeeze()
            batch_y = torch.from_numpy(batch_y).float()

            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            optimizer.zero_grad()
            
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_dim),
                torch.zeros(1, 1, model.hidden_layer_dim))

            # get predictions from model
            y_pred = model(batch_x)
            
            # perform backprop
            loss = criterion(y_pred, batch_y)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.data.item()
            
        if epoch%25 == 1:
            print("Epoch: {}, Loss: {}".format(epoch, total_loss / len(train_loader)))

In [12]:
import torch.optim as optim
from model.LSTM_Estimator import LSTMEstimator

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMEstimator(9, 30, 1, 9)
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.L1Loss()

train_lstm(model, processed_data, 1000, loss_fn, optimizer, device)

Epoch: 1, Loss: 0.6990070101377126
Epoch: 26, Loss: 0.5682820492499584
Epoch: 51, Loss: 0.5285619263713425
Epoch: 76, Loss: 0.4913871449393195
Epoch: 101, Loss: 0.4590462373720633
Epoch: 126, Loss: 0.4337306973096487
Epoch: 151, Loss: 0.4177602948369207
Epoch: 176, Loss: 0.3918305683780361
Epoch: 201, Loss: 0.3773677679332527
Epoch: 226, Loss: 0.36471302283776774
Epoch: 251, Loss: 0.35144416303248016
Epoch: 276, Loss: 0.339977101699726
Epoch: 301, Loss: 0.32800401505586263
Epoch: 326, Loss: 0.32001565155145284
Epoch: 351, Loss: 0.3156712453913044
Epoch: 376, Loss: 0.30856828633192424
Epoch: 401, Loss: 0.29817863251711874
Epoch: 426, Loss: 0.2994425506205172
Epoch: 451, Loss: 0.29111529886722565
Epoch: 476, Loss: 0.2839263238616892
Epoch: 501, Loss: 0.28064698625255274
Epoch: 526, Loss: 0.2764303003614013
Epoch: 551, Loss: 0.2732407281527648
Epoch: 576, Loss: 0.2707044699707547
Epoch: 601, Loss: 0.266480896521259
Epoch: 626, Loss: 0.26323049978629964
Epoch: 651, Loss: 0.2582227958215249

In [8]:
# Spotify API
import spotipy
import spotipy.util as util

# Spotify for developers client auth variables
username = os.environ['SPOTIFY_EMAIL']
spotify_id = os.environ['SPOTIFY_ID']
spotify_secret = os.environ['SPOTIFY_SECRET']

# Set API scope
scope='playlist-read-private, playlist-modify-private, playlist-modify-public'

# Get auth token
token = util.prompt_for_user_token(username, 
                                   scope,
                                   client_id=spotify_id,
                                   client_secret=spotify_secret,
                                   redirect_uri='http://localhost/')

In [44]:
from spotipy.oauth2 import SpotifyClientCredentials

#Authenticate
sp = spotipy.Spotify(
    client_credentials_manager = SpotifyClientCredentials(
        client_id=spotify_id,
        client_secret=spotify_secret
    )
)

In [45]:
# Read in WMW tracks to date for recommendations
track_data = pd.read_csv(os.path.join(data_dir, "wmw_tracks.csv"))

track_data.head()

Unnamed: 0,volume,position,track_name,artist_name,danceability,energy,key,loudness,mode,speechiness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,38,1,Finding It There,Goldmund,0.187,0.00257,1,-37.134,1,0.0427,...,0.0915,0.0374,123.707,audio_features,6CnPCuUcM3A5PMP4gUy0vw,spotify:track:6CnPCuUcM3A5PMP4gUy0vw,https://api.spotify.com/v1/tracks/6CnPCuUcM3A5...,https://api.spotify.com/v1/audio-analysis/6CnP...,220120,5
1,38,2,Light Forms,Rohne,0.671,0.545,10,-12.848,0,0.0393,...,0.118,0.284,133.036,audio_features,6MkUPsz5hYeneo0a9H0VT8,spotify:track:6MkUPsz5hYeneo0a9H0VT8,https://api.spotify.com/v1/tracks/6MkUPsz5hYen...,https://api.spotify.com/v1/audio-analysis/6MkU...,265870,4
2,38,3,C-Side,Khruangbin,0.688,0.779,11,-10.129,0,0.0579,...,0.349,0.938,94.073,audio_features,6GvAM8oyVApQHGMgpBt8yl,spotify:track:6GvAM8oyVApQHGMgpBt8yl,https://api.spotify.com/v1/tracks/6GvAM8oyVApQ...,https://api.spotify.com/v1/audio-analysis/6GvA...,283407,4
3,38,4,Didn't I (Dave Allison Rework),Darondo,0.539,0.705,0,-6.729,1,0.0527,...,0.133,0.685,186.033,audio_features,1owjOeZt1BdYWW6T8fIAEe,spotify:track:1owjOeZt1BdYWW6T8fIAEe,https://api.spotify.com/v1/tracks/1owjOeZt1BdY...,https://api.spotify.com/v1/audio-analysis/1owj...,328000,4
4,38,5,Woman Of The Ghetto - Akshin Alizadeh Remix,Marlena Shaw,0.707,0.573,7,-8.403,0,0.0276,...,0.0858,0.189,100.006,audio_features,2h8cQH7zhUWrynZi2MKhhC,spotify:track:2h8cQH7zhUWrynZi2MKhhC,https://api.spotify.com/v1/tracks/2h8cQH7zhUWr...,https://api.spotify.com/v1/audio-analysis/2h8c...,302467,4


In [46]:
from tqdm.notebook import tqdm
from sklearn.externals import joblib

feature_list =  ['danceability','energy', 'loudness', 'speechiness', 'acousticness',
                 'instrumentalness', 'liveness', 'valence', 'tempo']

#'mode','key','tempo'

std_scaler = joblib.load('standard_features.pkl')

class Playlist():
    def __init__(self):
        self.name = "Wilson's Morning Wake Up Vol. Test"
        self.intro_songs = []
        self.search_results = []
        self.recommended_track_ids = pd.DataFrame() #list of track ids straight from spotify
        self.trax = [] #all tracks as dict
        self.df = None #this is where the data goes
        self.playlist = None
        
       
        # DO EVERYTHING
        self.get_recommendations() # Grab recommendations based on full WMW catalog
        self.prep_features() # Prepare features using StandardScaler
#         self.get_predictions() # Generate features for each track position for new WMW
        
        
    def get_recommendations(self):
        print('Getting Recommendations...')
        
        # Iterate full catalog of WMW songs
        for _, row in tqdm(track_data[track_data['volume'] == 38].iterrows(), total=track_data[track_data['volume'] == 38].shape[0]):
            song_search = row['track_name'].partition('-')[0] + ' ' + row['artist_name']
            try:
        
                # Query Spotify to get track metadata
                song_res = sp.search(song_search, limit=1)['tracks']['items'][0]

                self.search_results.append({
                    'id': song_res['id'],
                    'artists': [i['name'] for i in song_res['artists']],
                    'name': song_res['name']
                })
                
                # Gather recommendations for each of the past WMW tracks
                results = sp.recommendations(seed_tracks = [song_res['id']], limit=10)

                for r in results['tracks']:
                    track={}
                    track['id'] = r['id']
                    track['artists'] = [i['name'] for i in r['artists']],
                    track['name'] = r['name']
                    track_features = sp.audio_features(r['id'])[0]
                    track.update(track_features)
                    final_track = pd.DataFrame(track, index=[0])
                    self.recommended_track_ids = self.recommended_track_ids.append(final_track, ignore_index=True)
                    
            except:
                print("Song not searchable")
        
        return self.recommended_track_ids
    
    
    def prep_features(self):
        self.recommended_track_ids[feature_list] = std_scaler.transform(self.recommended_track_ids[feature_list])
            
    
    def generate_playlist_features(model, intro_tracks, predict_len=15):
        hidden = model.init_hidden()
        
        # extracts features from intro tracks
        intro_input = text_to_tensor()
        
        # predicted playlist
        predicted = intro_tracks
        
        # build up hidden state
        for p in range(len(intro_tracks) - 1):
            _, hidden = model(intro_input[p], hidden)
        inp = intro_input[-1]
        
        for p in range(predict_len):
            output, hidden = model(inp, hidden)
            

In [47]:
# pl = Playlist()

In [48]:
# pl.recommended_track_ids.head()

In [49]:
# import random

# recommended = pd.DataFrame()

# wmw_sample = random.sample(track_data['volume'].unique().tolist(), 5)

# wmw_sample_df = track_data[track_data['volume'].isin(wmw_sample)]

# # Iterate full catalog of WMW songs
# for _, row in tqdm(wmw_sample_df.iterrows(), total=wmw_sample_df.shape[0]):
#     song_search = row['track_name'].partition('-')[0] + ' ' + row['artist_name']
#     try:

#         # Query Spotify to get track metadata
#         song_res = sp.search(song_search, limit=1)['tracks']['items'][0]

#         # Gather recommendations for each of the past WMW tracks
#         results = sp.recommendations(seed_tracks = [song_res['id']], limit=10)

#         for r in results['tracks']:
#             track={}
#             track['id'] = r['id']
#             track['artists'] = [i['name'] for i in r['artists']],
#             track['name'] = r['name']
#             track_features = sp.audio_features(r['id'])[0]
#             track.update(track_features)
#             final_track = pd.DataFrame(track, index=[0])
#             recommended = recommended.append(final_track, ignore_index=True)

#     except:
#         print("Song not searchable")

In [50]:
# recommended[feature_list] = std_scaler.transform(recommended[feature_list])

#TODO: Make sure new playlist has unique songs compared to the all previous WMWs

In [51]:
# recommended[feature_list].head()

In [52]:
def harmonic_match(key, mode):
    
    # Harmonic Mixing Wheel: Pitch Class 
    # 1A 0 - A flat minor: 8 | 1B 0 - B major: 11
    # 2A 1 - E flat minor: 3 | 2B 1 - F-sharp major: 6
    # 3A 2 - B-flat minor: 10 | 3B 2 - D-flat major: 1
    # 4A 3 - F minor: 5 | 4B 3 - A-flat major: 8
    # 5A 4 - C minor: 0 | 5B 4 - E-flat major: 3
    # 6A 5 - G minor: 7 | 6B 5 - B-flat major: 10
    # 7A 6 - D minor: 2 | 7B 6 - F major: 5
    # 8A 7 - A minor: 9 | 8B 7 - C major: 0
    # 9A 8 - E minor: 4 | 9B 8 - G major: 7
    # 10A 9 - B minor: 11 | 10B 9 - D major: 2
    # 11A 10 - F sharp minor: 6 | 11B 10 - A major: 9
    # 12A 11 - D flat minor: 1 | 12B 11 - E major: 4
    
    # Harmonic keys mapped to corresponding pitch classes
    pitch_to_harmonic_keys = {0: [4, 7], 1: [11, 2], 2: [6, 9],
                              3: [1, 4], 4: [8, 11], 5: [3, 6],
                              6: [10, 1], 7: [5, 8], 8: [0, 3],
                              9: [7, 10], 10: [2, 5], 11: [9, 0]}
    
    # Extract values and keys
    dv = np.array(list(pitch_to_harmonic_keys.values()))
    dk = np.array(list(pitch_to_harmonic_keys.keys()))

    # Harmonic key code corresponding song pitch class
    harm_key = dv[np.where(dk == key)][0][mode]
    
    # Harmonic key codes
    harmonic_keys = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
    
    # Get compatible key codes
    comp_keycodes = np.take(harmonic_keys, 
                            [harm_key - 1, harm_key, harm_key + 1],
                            mode='wrap')
    
    # Compatible keys
    comp_keys = [np.where(dv[:, mode] == i)[0][0].tolist() for i in comp_keycodes]
      
    # Compatible up/down key
    inner_outer_key = np.array([np.where(dv[:, int(not bool(mode))] == harm_key)[0][0]])
    
    comp_keys = np.concatenate([comp_keys, inner_outer_key])
    
    print("Compatible keys:", comp_keys)
    
    return comp_keys, inner_outer_key

In [53]:
from unittest.mock import MagicMock, patch

def _print_success_message():
    print('Tests Passed!')

def test_harmonic_mixing(song):
    
    truth_octaves = [11, 0, 1]
    
    next_octaves = harmonic_match(0, 1)
    
    # check shape and equality of first track
    assert len(truth_octaves) == len(next_octaves), \
        'Number of octaves incorrect, should get: ' + str(len(truth_octaves))    
    
    # check shape of input and output arrays
    assert input_playlists[0][0].shape[1]==track_features_len, \
        'input_features should have as many columns as selected features, got: {}'.format(train_x.shape[1])
    assert input_playlists[0][1].shape[1]==target_features_len, \
        'target_features should have as many columns as selected features, got: {}'.format(train_x.shape[1])
    
    #TODO: Add more tests
    
    _print_success_message()

In [54]:
# Look at a track
# recommended.iloc[142]

In [55]:
import random


feature_list =  ['danceability','energy', 'loudness', 'speechiness', 'acousticness',
                 'instrumentalness', 'liveness', 'valence', 'tempo']

std_scaler = joblib.load('standard_features.pkl')

def predict_playlist(model, predict_len=15):
    global recommended
    
    intro_tracks = pd.DataFrame() #list of track ids straight from spotify
    
    model.eval()
    
    song = track_data[track_data['position'] == 1].sample(1).copy()
        
    # Gather recommendations for each of the past WMW tracks
    song_res = sp.recommendations(seed_tracks = song['id'].values, limit=1)
    
    for r in song_res['tracks']:
        track={}
        track['id'] = r['id']
        track['artists'] = [i['name'] for i in r['artists']],
        track['name'] = r['name']
        track_features = sp.audio_features(r['id'])[0]
        track.update(track_features)
        final_track = pd.DataFrame(track, index=[0])
        intro_tracks = intro_tracks.append(final_track, ignore_index=True)
    
#     track = {
#         'id': song_res['id'],
#         'artists': [i['name'] for i in song_res['artists']],
#         'name': song_res['name']
#     }

#     track_features = sp.audio_features(track['id'])[0]

#     track.update(track_features)

#     final_track = pd.DataFrame(track, index=[0])

#     intro_tracks = intro_tracks.append(final_track, ignore_index=True)
    
#     print(intro_tracks)
    


#     try:

#         # Query Spotify to get track metadata
#         song_res = sp.search(song_search, limit=1)['tracks']['items'][0]


    
#     # Iterate full catalog of WMW songs
#     for song in initial_songs, total=len(initial_songs):
                
#         song_search = song
                
#         try:
            
#             # Query Spotify to get track metadata
#             song_res = sp.search(song_search, limit=1)['tracks']['items'][0]
            
#             track = {
#                 'id': song_res['id'],
#                 'artists': [i['name'] for i in song_res['artists']],
#                 'name': song_res['name']
#             }
            
#             track_features = sp.audio_features(track['id'])[0]
            
#             track.update(track_features)
            
#             final_track = pd.DataFrame(track, index=[0])
            
#             intro_tracks = intro_tracks.append(final_track, ignore_index=True)
                
#         except:
#             print("Song not searchable")

    intro_tracks[feature_list] = std_scaler.transform(intro_tracks[feature_list])
            
    predicted = intro_tracks

    inp = torch.FloatTensor(intro_tracks[feature_list].values)
    
    print("Intro:", predicted['name'].values[0], '-',predicted['artists'].values[0])

    for p in tqdm(range(predict_len)):
        print("Song position:", p + 2)
        
        # Generate output feature set of next song
        output = model(inp).detach().numpy()
        
        # Get mode and key from last song and generate compatible keys and modes
        keys, outer_inner_key = harmonic_match(predicted.iloc[-1]['key'], predicted.iloc[-1]['mode'])
        
        # Get recommended tracks for song position
        sample = position_recommendation(track_data, p + 1)
        
        sample = sample[~sample['id'].isin(track_data['id'].values.tolist())].copy()
        
        # Gather compatible next songs
        harmonic_next_songs = sample[(sample['key'].isin(keys) & sample['mode'] == predicted.iloc[-1]['mode'])].reset_index(drop=True)
        
        # Pick song at closest distance to current song
        next_song_id = np.argmin(cdist(output, harmonic_next_songs[feature_list]))
        next_song = harmonic_next_songs.iloc[next_song_id].copy()
        
        # Set new input vector for next song
        inp = torch.FloatTensor([next_song[feature_list]])
        
        # Append next song to playlist
        predicted = predicted.append(next_song, ignore_index=True)
        
        # Drop next song from recommended song pool
#         sample = sample.drop([next_song_id], axis=0).reset_index(drop=True)
        
    return predicted

In [56]:
def position_recommendation(track_data, song_position):
    
    recommended = pd.DataFrame()
    
    wmw_sample = random.sample(track_data['volume'].unique().tolist(), 10)

    wmw_sample_df = track_data[track_data['volume'].isin(wmw_sample)].copy()

    wmw_sample_df = wmw_sample_df[wmw_sample_df['position'] == song_position + 1].copy()

    # Iterate full catalog of WMW songs
    for _, row in wmw_sample_df.iterrows():
        song_search = row['track_name'].partition('-')[0] + ' ' + row['artist_name']
        try:

            # Query Spotify to get track metadata
            song_res = sp.search(song_search, limit=1)['tracks']['items'][0]

            # Gather recommendations for each of the past WMW tracks
            results = sp.recommendations(seed_tracks = [song_res['id']], limit=20)

            for r in results['tracks']:
                track={}
                track['id'] = r['id']
                track['artists'] = [i['name'] for i in r['artists']],
                track['name'] = r['name']
                track_features = sp.audio_features(r['id'])[0]
                track.update(track_features)
                final_track = pd.DataFrame(track, index=[0])
                recommended = recommended.append(final_track, ignore_index=True)

        except:
            print("Song not searchable")
            
    recommended[feature_list] = std_scaler.transform(recommended[feature_list])
            
    return recommended

In [57]:
# initial_songs = ['luke howard portrait gallery']

new_playlist = predict_playlist(model, predict_len=14)

Intro: Eyes Closed And Traveling - ['Peter Broderick']


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))

Song position: 2
Compatible keys: [11  6  1  3]
Song position: 3
Compatible keys: [ 6  1  8 10]
Song position: 4
Compatible keys: [ 6  1  8 10]
Song position: 5
Compatible keys: [ 6  1  8 10]
Song position: 6
Compatible keys: [1 8 3 5]
Song position: 7
Compatible keys: [ 6  1  8 10]
Song position: 8
Compatible keys: [ 6  1  8 10]
Song position: 9
Compatible keys: [ 3 10  5  7]
Song position: 10
Compatible keys: [0 7 2 4]
Song position: 11
Compatible keys: [ 7  2  9 11]
Song position: 12
Compatible keys: [0 7 2 4]
Song position: 13
Compatible keys: [0 7 2 4]
Song position: 14
Compatible keys: [5 0 7 9]
Song position: 15
Compatible keys: [10  5  0  2]



In [58]:
if token:
    sp = spotipy.Spotify(auth=token)
    sp.trace = False
    tracks = sp.user_playlist_replace_tracks('1247785541', '7x1MY3AW3YCaHoicpiacGv', new_playlist['id'].values)
else:
    print("Can't get token for", username)

In [10]:
# if len(sys.argv) > 3:
#     username = sys.argv[1]
#     playlist_id = sys.argv[2]
#     track_ids = sys.argv[3:]
# else:
#     print("Usage: %s username playlist_id track_id ..." % (sys.argv[0],))
#     sys.exit()

# scope = 'playlist-modify-public'
# token = util.prompt_for_user_token(username, scope)

if token:
    sp = spotipy.Spotify(auth=token)
    sp.trace = False
    results = sp.playlist('7x1MY3AW3YCaHoicpiacGv')
else:
    print("Can't get token for", username)

In [11]:
results

{'collaborative': False,
 'description': 'Wilson&#x27;s AI Morning Wake Up',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/7x1MY3AW3YCaHoicpiacGv'},
 'followers': {'href': None, 'total': 0},
 'href': 'https://api.spotify.com/v1/playlists/7x1MY3AW3YCaHoicpiacGv',
 'id': '7x1MY3AW3YCaHoicpiacGv',
 'images': [{'height': 640,
   'url': 'https://mosaic.scdn.co/640/ab67616d0000b27357414527896ba0427a4411b0ab67616d0000b2737154ecb419796618341679c5ab67616d0000b2739d9126ec04bb005c687291a5ab67616d0000b273c8ca7badcec8eff55cdca3f4',
   'width': 640},
  {'height': 300,
   'url': 'https://mosaic.scdn.co/300/ab67616d0000b27357414527896ba0427a4411b0ab67616d0000b2737154ecb419796618341679c5ab67616d0000b2739d9126ec04bb005c687291a5ab67616d0000b273c8ca7badcec8eff55cdca3f4',
   'width': 300},
  {'height': 60,
   'url': 'https://mosaic.scdn.co/60/ab67616d0000b27357414527896ba0427a4411b0ab67616d0000b2737154ecb419796618341679c5ab67616d0000b2739d9126ec04bb005c687291a5ab67616d0000b273c8ca7badcec

In [None]:
# # Training function
# def train_rnn(model, train_loader, epochs, criterion, optimizer, device):
#     """
#     This is the training method that is called by the PyTorch training script. The parameters
#     passed are as follows:
#     model        - The PyTorch model that we wish to train.
#     train_loader - The PyTorch DataLoader that should be used during training.
#     epochs       - The total number of epochs to train for.
#     criterion    - The loss function used for training. 
#     optimizer    - The optimizer to use during training.
#     device       - Where the model and data should be loaded (gpu or cpu).
#     """
    
#     # training loop is provided
#     for epoch in range(1, epochs + 1):
#         model.train() # Make sure that the model is in training mode.

#         total_loss = 0
        
#         hidden = model.initHidden()

#         for batch in train_loader:
            
#             # get data
#             batch_x, batch_y = batch
            
#             # 
#             batch_x = torch.from_numpy(batch_x).float().squeeze()
#             batch_y = torch.from_numpy(batch_y).float()

#             batch_x = batch_x.to(device)
#             batch_y = batch_y.to(device)

#             optimizer.zero_grad()

#             y_pred = []
            
#             # get predictions
#             for x in batch_x:
#                 y, hidden = model(x, hidden)
#                 y_pred.append(y)
            
#             # perform backprop
#             loss = criterion(y_pred, batch_y)
#             loss.backward()
#             optimizer.step()
            
#             total_loss += loss.data.item()
            
#         if epoch%25 == 1:
#             print("Epoch: {}, Loss: {}".format(epoch, total_loss / len(train_loader)))

#TODO: Create working RNN Benchmark model

In [None]:
# import torch.optim as optim
# from model.RnnEstimator import RNNEstimator

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = RNNEstimator(11, 30, 8)
# optimizer = optim.Adam(model.parameters(), lr=0.001)
# loss_fn = torch.nn.L1Loss()

# train_rnn(model, processed_data, 100, loss_fn, optimizer, device)

### Build and Train the PyTorch Model with Hyperparameter Tuning

In [None]:
# Estimator code
from sagemaker.pytorch import PyTorch
output_path = 's3://{}/{}'.format(bucket, prefix)

estimator = PyTorch(entry_point="LSTM_Train.py",
                    source_dir="model",
                    role=role,
                    framework_version='0.4.0',
                    train_instance_count=1,
                    output_path = output_path,
                    train_instance_type='ml.m4.xlarge',
                    hyperparameters={
                        'input_features': 11,
                        'hidden_dim': 12,
                        'output_dim': 8,
                        'epochs': 100
                    })

In [None]:
# Fit estimator
estimator.fit({'train': input_data})

In [None]:
%%time

# deploy your model to create a predictor
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')