In [37]:
%matplotlib inline

import pandas as pd
import sklearn
from sklearn import model_selection
import Recommenders as Recommenders
triplets_file='https://static.turi.com/datasets/millionsong/10000.txt' # user_id song_id  listening_time
songs_metadata_file='https://static.turi.com/datasets/millionsong/song_data.csv' #song_id #title #release_by #artist_name


In [38]:
# reading triplets_file  
song_df_1=pd.read_table(triplets_file,header=None)
song_df_1.columns=['user_id','song_id','listen_count']
#song_df_1.head() #printing some initial rows for visual reference i.e. 5 rows

In [31]:
# merging triplets_file and songs_metadata_file there will be a commom column in both files if we want to merge both files
song_df_2=pd.read_csv(songs_metadata_file) # reading csv_file i.e. songs_metadata_file
song_df=pd.merge(song_df_1,song_df_2.drop_duplicates(['song_id']),on="song_id",how="left") # merging both files by removing 'song_id'
# column from metadata_file

song_df.head() #printing some initial rows for visual reference i.e. 5 rows
song_df=song_df.head(10000)

In [32]:
'''We then merge the song and artist_name into one column, aggregated by number of time a particular song is listened too in 
general by all users. The first line in the code below group the song_df by number of listen_count ascending. 
The second line calculate the group_sum by summing the listen_count of each song. 
The third line add a new column called percentage, and calculate this percentage by dividing the listen_count by 
the sum of listen_count of all songs and then multiply by 100. The last line list the song in the ascending order of 
popularity for a given song'''

song_df['song']= song_df['title'].map(str) + " - " + song_df['artist_name']
song_grouped = song_df.groupby(['song']).agg({'listen_count': 'count'}).reset_index() #aggregation 
grouped_sum = song_grouped['listen_count'].sum()  #sum
song_grouped['percentage']  = song_grouped['listen_count'].div(grouped_sum)*100  #percentage
song_grouped.sort_values(['listen_count', 'song'], ascending = [0,1]) #sorting

Unnamed: 0,song,listen_count,percentage
3660,Sehr kosmisch - Harmonia,45,0.45
4678,Undo - Björk,32,0.32
5105,You're The One - Dwight Yoakam,32,0.32
1071,Dog Days Are Over (Radio Edit) - Florence + Th...,28,0.28
3655,Secrets - OneRepublic,28,0.28
...,...,...,...
5139,high fives - Four Tet,1,0.01
5140,in white rooms - Booka Shade,1,0.01
5143,paranoid android - Christopher O'Riley,1,0.01
5149,¿Lo Ves? [Piano Y Voz] - Alejandro Sanz,1,0.01


In [33]:

users = song_df['user_id'].unique()
len(users) ## return 365 unique users
songs = song_df['song'].unique()
len(songs) ## return 5151 unique songs

train_data, test_data = model_selection.train_test_split(song_df, test_size = 0.20, random_state=0)


In [35]:
is_model = Recommenders.item_similarity_recommender_py()
is_model.create(train_data, 'user_id', 'song')

#Print the songs for the user in training data
user_id = users[8]
user_items = is_model.get_user_items(user_id)
#
print("------------------------------------------------------------------------------------")
print("Training data songs for the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")

for user_item in user_items:
    print(user_item)

print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

#Recommend songs for the user using personalized model
is_model.recommend(user_id)


------------------------------------------------------------------------------------
Training data songs for the user userid: 9bb911319fbc04f01755814cb5edb21df3d1a336:
------------------------------------------------------------------------------------
Tell Me Why - Supermode
Raining Again (Steve Angello's Vocal Mix) - Moby
Angel On My Shoulder (EDX Radio Edit) - Kaskade
If I Can't Have You - Mount Sims
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique songs for the user: 4
no. of unique songs in the training set: 4483
Non zero values in cooccurence_matrix :16


Unnamed: 0,user_id,song,score,rank
0,9bb911319fbc04f01755814cb5edb21df3d1a336,Cover My Eyes - La Roux,0.0,1
1,9bb911319fbc04f01755814cb5edb21df3d1a336,The Carpal Tunnel Of Love - Fall Out Boy,0.0,2
2,9bb911319fbc04f01755814cb5edb21df3d1a336,The Whole World - Outkast Featuring Killer Mike,0.0,3
3,9bb911319fbc04f01755814cb5edb21df3d1a336,Go With The Flow - Queens Of The Stone Age,0.0,4
4,9bb911319fbc04f01755814cb5edb21df3d1a336,She Just Likes To Fight - Four Tet,0.0,5
5,9bb911319fbc04f01755814cb5edb21df3d1a336,Mourning Air - Portishead,0.0,6
6,9bb911319fbc04f01755814cb5edb21df3d1a336,Break Through - Colbie Caillat,0.0,7
7,9bb911319fbc04f01755814cb5edb21df3d1a336,Creepin Up The Backstairs - The Fratellis,0.0,8
8,9bb911319fbc04f01755814cb5edb21df3d1a336,Warning Sign - Coldplay,0.0,9
9,9bb911319fbc04f01755814cb5edb21df3d1a336,Your Arms Feel Like home - 3 Doors Down,0.0,10


In [36]:
is_model.get_similar_items(['U Smile - Justin Bieber'])


no. of unique songs in the training set: 4483
Non zero values in cooccurence_matrix :271


Unnamed: 0,user_id,song,score,rank
0,,Somebody To Love - Justin Bieber,0.428571,1
1,,Bad Company - Five Finger Death Punch,0.375,2
2,,Love Me - Justin Bieber,0.333333,3
3,,One Time - Justin Bieber,0.333333,4
4,,Here Without You - 3 Doors Down,0.333333,5
5,,Stuck In The Moment - Justin Bieber,0.333333,6
6,,Teach Me How To Dougie - California Swag District,0.333333,7
7,,Paper Planes - M.I.A.,0.333333,8
8,,Already Gone - Kelly Clarkson,0.333333,9
9,,The Funeral (Album Version) - Band Of Horses,0.3,10
