## SONG RECOMMENDER SYSTEM

In [1]:
%matplotlib inline
import pandas 
from sklearn.model_selection import train_test_split
import Recommenders as Recommenders

## Loading the music data

In [2]:
# Read userid-songid-listen-copunt triplets
triplets_file = 'kaggle_visible_evaluation_triplets.txt'
song_metadata_file = "song_data.csv"

# DataFrame for our triplet file
song_df1 = pandas.read_table(triplets_file,header = None)
song_df1.columns = ['user_id','song_id','listen_count']

# Dataframe for meta data file
song_df2 = pandas.read_csv(song_metadata_file)

#Merge the above two dataframes to get a single input data frame for recommender system
song_df = pandas.merge(song_df1,song_df2.drop_duplicates(['song_id']),on="song_id",how='left')

In [3]:
# To find length of data set
print("total numberof training data:",len(song_df))

# Create a subset of dataset
song_df = song_df.head(10000)
song_df.head()


total numberof training data: 1450933


Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOBONKR12A58A7A7E0,1,You're The One,If There Was A Way,Dwight Yoakam,1990
1,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOEGIYH12A6D4FC0E3,1,Horn Concerto No. 4 in E flat K495: II. Romanc...,Mozart - Eine kleine Nachtmusik,Barry Tuckwell/Academy of St Martin-in-the-Fie...,0
2,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOFLJQZ12A6D4FADA6,1,Tive Sim,Nova Bis-Cartola,Cartola,1974
3,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SOHTKMO12AB01843B0,1,Catch You Baby (Steve Pitron & Max Sanna Radio...,Catch You Baby,Lonnie Gordon,0
4,fd50c4007b68a3737fe052d5a4f78ce8aa117f3d,SODQZCY12A6D4F9D11,1,El Cuatrero,From Argentina To The World,Miguel Calo,2001


## Merging song title and song artist into one column for analysis

In [4]:
song_df['song'] = song_df['title'].map(str)+"-"+song_df['artist_name']

## Arranging the songs in the dataset

In [5]:
song_grouped = song_df.groupby(['song']).agg({'listen_count':'count'}).reset_index()
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['percentage'] =(song_grouped['listen_count']/(grouped_sum))*100
song_grouped.sort_values(['listen_count','song'],ascending = [0,1])

Unnamed: 0,song,listen_count,percentage
7309,You're The One-Dwight Yoakam,40,0.40
6711,Undo-Björk,38,0.38
5241,Sehr kosmisch-Harmonia,36,0.36
4959,Revelry-Kings Of Leon,32,0.32
1512,Dog Days Are Over (Radio Edit)-Florence + The ...,29,0.29
...,...,...,...
7377,Árboles de la barranca-Carlos Y Jose,1,0.01
7378,Ännu En Dag-Drifters,1,0.01
7379,Ça Marche-Christophe Maé,1,0.01
7380,Örökké Tart-Tankcsapda,1,0.01


In [6]:
# TO count the number of unique users
users = song_df['user_id'].unique()
len(users)

753

In [7]:
# To count the number of unique songs:
songs = song_df['song'].unique()
len(songs)


7382

## Split the whole data into training and testing data 

In [8]:
train_data , test_data = train_test_split(song_df,test_size = 0.20, random_state = 0)
print(train_data.head(5))

                                       user_id             song_id  \
7389  d3c75a5a579f29a3a853b3a4ba76d3a2e5243655  SOQTHZW12A8C1400F8   
9275  3071be7411e636fcda6c99277a5b51c45a7c4866  SODQPTQ12AF72A64BC   
2995  1fa6a4add6eacc3ce1aec44fc37037db2b09bf82  SOUCBEB12A6310E1F9   
5316  c8db3788ca8f60d92abf827ad59f424f05897cfc  SOBWFXM12AB0182808   
356   6530c4fc41b9110de5d39fe0355fa103c66385f0  SOSIZFO12A58A79934   

      listen_count                                 title  \
7389             2  Aghia Sophia (2008 Digital Remaster)   
9275             6                              Scorpion   
2995             3                           Jesus Walks   
5316            14                            Orange Sky   
356              2                          In Da Gutter   

                                                release  \
7389  Epica Etica Etnica Pathos (2008 Remastered Edi...   
9275                                  The Budos Band II   
2995                                     

## Popularity filter based recommender

In [9]:
# Simple popularity based recommender...Not exclusive for a particular user
pm = Recommenders.popularity_recommender()
pm.create(train_data)

In [15]:
# Using recommender to make some recommendation
user_id = users[7]

print("Recommendations for User id ",user_id," : ")
pm.recommend(user_id)

Recommendations for User id  e9dc6b4c2b22aa6dc8260e1963021567728055b2  : 


Unnamed: 0,user_id,song,score,Rank
6049,e9dc6b4c2b22aa6dc8260e1963021567728055b2,You're The One-Dwight Yoakam,34,1.0
5552,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Undo-Björk,31,2.0
4343,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Sehr kosmisch-Harmonia,30,3.0
1259,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Dog Days Are Over (Radio Edit)-Florence + The ...,24,4.0
4118,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Revelry-Kings Of Leon,23,5.0
1662,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Fireflies-Charttraxx Karaoke,19,6.0
1359,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Drop The World-Lil Wayne / Eminem,18,7.0
2206,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Horn Concerto No. 4 in E flat K495: II. Romanc...,17,8.0
4108,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Représente-Alliance Ethnik,17,9.0
4335,e9dc6b4c2b22aa6dc8260e1963021567728055b2,Secrets-OneRepublic,16,10.0


## Similarity filter based recommender

In [17]:
is_model = Recommenders.item_similarity_recommender()
is_model.create(train_data)


In [18]:
user_id = users[5]

print("Recommendations for User id ",user_id," : ")
is_model.recommend(user_id)

Recommendations for User id  91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62  : 
No. of unique songs for the user: 10
no. of unique songs in the training set: 6107
Non zero values in cooccurence_matrix :206


Unnamed: 0,user_id,song,score,rank
0,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,Shakedown-Au Pairs,0.033333,1
1,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,S-Bahn-Surfen (2007 Digital Remaster)-Spider M...,0.033333,2
2,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,Attack Of The Killerbirds-Emilie Simon,0.033333,3
3,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,Hate (I Really Don't Like You)-Plain White T's,0.033333,4
4,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,Ike's Mood-Isaac Hayes,0.033333,5
5,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,Dancing In The Moonlight (It's Caught Me In It...,0.033333,6
6,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,Rock Climbing [Byte 1] (2005)-Damu The Fudgemunk,0.033333,7
7,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,Frantic-The Lovely Feathers,0.033333,8
8,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,La Femme Parallel-Thievery Corporation,0.033333,9
9,91b8fac7dc5e03f6cfaf6e2aa7171f14a8354d62,It Had To Be You (Cd)-Motion City Soundtrack,0.033333,10
