In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
tracks_data = pd.read_csv('data/preprocessed_dataset.csv')
model = pickle.load(open('kmeans_model.sav', 'rb'))

In [3]:
tracks_clusters = pd.concat([pd.DataFrame(model.labels_, columns=['cluster']),tracks_data], axis=1)

## Recommender System

- If track is not in database, recommend the most popular tracks
- If track is in database, recommend tracks from clusters

#### Note: n is the recommended number of tracks 

In [4]:
def cold_start_recommend(tracks_data, n):
    most_popular = tracks_data.sort_values(by='num_followers',ascending=False).head(n)
    return most_popular[['artist_name','track_name','album_name']]


def content_based_recommend(selected_cluster, n):
    recommend_tracks = None
    selected_cluster_size = selected_cluster.shape[0]
    selected_cluster.set_index('track_uri',inplace=True)
    selected_cluster = selected_cluster[['artist_name','track_name','album_name']]
    
    if selected_cluster_size == n:
        recommend_tracks = selected_cluster
    elif selected_cluster_size > n:
        recommend_tracks = selected_cluster.sample(n)
    else:
        recommend_tracks = selected_cluster
        popular_tracks = cold_start_recommend(tracks_data,n-selected_cluster_size)
        recommend_tracks = pd.concat([recommend_tracks,popular_tracks])
    return recommend_tracks


def recommend(artist_name, track_name, tracks_clusters, n):
    recommend_tracks = None
    track = tracks_clusters.loc[np.logical_and(tracks_clusters['artist_name']==artist_name, tracks_clusters['track_name']==track_name),:]
    
    if track.shape[0] == 0:
        recommend_tracks = cold_start_recommend(tracks_clusters,n)
    else:
        cluster_label = track['cluster'].tolist()[0]
        selected_cluster = tracks_clusters.loc[tracks_clusters['cluster']==cluster_label,:]
        recommend_tracks = content_based_recommend(selected_cluster, n)
 
    return recommend_tracks['track_name'].tolist()
        

In [5]:
recommend('Bob Dylan','Boots of Spanish Leather',tracks_clusters,5)

['Really Really',
 'Inside Out',
 'El Cargo Del 5',
 'We Want Some P--sy',
 'Route 66']