### Data Mining and Machine Learning
### Recommendation Systems based on popularity
### Edgar Acuna
### adapted from https://github.com/vyashemang/popularity_based_recommendation
### May 2020

In [10]:
import os
os.getcwd()
import pandas
import numpy as np
import Recommender
import warnings
warnings.filterwarnings('ignore')

In [11]:
#Reading a database of two millions user-songs, 3863 users and 9891 songs
url = 'https://static.turi.com/datasets/millionsong/10000.txt'
song_df_1 = pandas.read_table(url,header=None)
song_df_1.columns = ['user_id', 'song_id', 'listen_count']

In [12]:
song_df_1.head()

Unnamed: 0,user_id,song_id,listen_count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1


In [13]:
### Reading a database of one million songs including Title and artist name
songs_metadata_file = 'https://academic.uprm.edu/eacuna/song_data.csv'
song_df_2 = pandas.read_csv(songs_metadata_file)
song_df_2.head()

Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


In [14]:
#Merging the two databases 
song_df = pandas.merge(song_df_1, song_df_2.drop_duplicates(['song_id']), on="song_id", how="left")

In [15]:
song_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2000000 entries, 0 to 1999999
Data columns (total 7 columns):
user_id         object
song_id         object
listen_count    int64
title           object
release         object
artist_name     object
year            int64
dtypes: int64(2), object(5)
memory usage: 122.1+ MB


In [16]:
#Using only 100k songs
song_df = song_df.head(100000)

In [17]:
#Modifying the column song in order to include the Title and the artist
song_df['song'] = song_df['title'].map(str) + " - " + song_df['artist_name']
song_df.head()

Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year,song
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0,The Cove - Jack Johnson
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976,Entre Dos Aguas - Paco De Lucia
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007,Stronger - Kanye West
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005,Constellations - Jack Johnson
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999,Learn To Fly - Foo Fighters


In [18]:
song_df_grouped = song_df.groupby(['song']).agg({'listen_count': 'count'}).reset_index()
song_df_grouped.sort_values('listen_count',ascending = 0)

Unnamed: 0,song,listen_count
7079,Sehr kosmisch - Harmonia,427
9030,Undo - Björk,367
2052,Dog Days Are Over (Radio Edit) - Florence + Th...,363
9818,You're The One - Dwight Yoakam,317
7067,Secrets - OneRepublic,308
6727,Revelry - Kings Of Leon,306
3587,Horn Concerto No. 4 in E flat K495: II. Romanc...,266
2695,Fireflies - Charttraxx Karaoke,258
8793,Tive Sim - Cartola,246
3460,Hey_ Soul Sister - Train,235


In [19]:
users = song_df['user_id'].unique()
len(users)

3863

In [20]:
items = song_df['song'].unique()
len(items)

9891

In [21]:
#Splitting the data in training and test sets
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(song_df, test_size = 0.20, random_state=0)

In [22]:
#Creating the recomendation system using the training data
pr = Recommender.Popularity_Recommender()
pr.create(train_data, 'user_id', 'song')

In [23]:
pr.create

<bound method Popularity_Recommender.create of <Recommender.Popularity_Recommender object at 0x0000017C221DBE10>>

In [24]:
#List of recommending songs for the third  user
pr.recommend(users[2])

Unnamed: 0,user_id,song,score,Rank
7005,bd4c6e843f00bd476847fb75c47b4fb430a06856,Sehr kosmisch - Harmonia,345,1.0
2034,bd4c6e843f00bd476847fb75c47b4fb430a06856,Dog Days Are Over (Radio Edit) - Florence + Th...,290,2.0
8939,bd4c6e843f00bd476847fb75c47b4fb430a06856,Undo - Björk,289,3.0
9719,bd4c6e843f00bd476847fb75c47b4fb430a06856,You're The One - Dwight Yoakam,255,4.0
6994,bd4c6e843f00bd476847fb75c47b4fb430a06856,Secrets - OneRepublic,249,5.0
6659,bd4c6e843f00bd476847fb75c47b4fb430a06856,Revelry - Kings Of Leon,245,6.0
3559,bd4c6e843f00bd476847fb75c47b4fb430a06856,Horn Concerto No. 4 in E flat K495: II. Romanc...,213,7.0
2672,bd4c6e843f00bd476847fb75c47b4fb430a06856,Fireflies - Charttraxx Karaoke,204,8.0
8704,bd4c6e843f00bd476847fb75c47b4fb430a06856,Tive Sim - Cartola,204,9.0
3432,bd4c6e843f00bd476847fb75c47b4fb430a06856,Hey_ Soul Sister - Train,183,10.0


In [25]:
#List of recommending songs for the 19th  user
pr.recommend(users[20])

Unnamed: 0,user_id,song,score,Rank
7005,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Sehr kosmisch - Harmonia,345,1.0
2034,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Dog Days Are Over (Radio Edit) - Florence + Th...,290,2.0
8939,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Undo - Björk,289,3.0
9719,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,You're The One - Dwight Yoakam,255,4.0
6994,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Secrets - OneRepublic,249,5.0
6659,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Revelry - Kings Of Leon,245,6.0
3559,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Horn Concerto No. 4 in E flat K495: II. Romanc...,213,7.0
2672,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Fireflies - Charttraxx Karaoke,204,8.0
8704,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Tive Sim - Cartola,204,9.0
3432,0afaa5d9d04bf85af720fe8cc566a41ca3e41c97,Hey_ Soul Sister - Train,183,10.0
