# Using data of user,song pairs with count of how many times the user played the song

# I have built two models : one which simply recommends songs based on popularity while other which uses the count co-occurence matrix to determine the recommended songs

In [1]:
import turicreate as tc

In [2]:
songs_data = tc.SFrame('song_data.sframe')

In [4]:
songs_data.head()

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODDNQT12A6D4F5F7E,5,Apuesta Por El Rock 'N' Roll ...,Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODXRTY12AB0180F3B,1,Paper Gangsta,Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFGUAY12AB017B0A8,1,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFRQTD12A81C233C0,1,Sehr kosmisch,Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOHQWYZ12A6D4FA701,1,Heaven's gonna burn your eyes ...,Thievery Corporation feat. Emiliana Torrini ...

song
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia ...
Stronger - Kanye West
Constellations - Jack Johnson ...
Learn To Fly - Foo Fighters ...
Apuesta Por El Rock 'N' Roll - Héroes del ...
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters ...
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery ...


# Let us look at some artists and see how many users listen to them

In [5]:
kanye_west = songs_data[songs_data['artist'] == 'Kanye West']
taylor_swift = songs_data[songs_data['artist'] == 'Taylor Swift']
lady_gaga = songs_data[songs_data['artist'] == 'Lady GaGa']
foo_fighters = songs_data[songs_data['artist'] == 'Foo Fighters']

In [6]:
users_kanye = kanye_west['user_id'].unique()

In [7]:
len(users_kanye)

2522

In [9]:
users_taylor = taylor_swift['user_id'].unique()
users_gaga = lady_gaga['user_id'].unique()
users_foo = foo_fighters['user_id'].unique()

len(users_taylor)

3246

In [10]:
len(users_gaga)

2928

In [11]:
len(users_foo)

2055

## Let us figure out the most popular artists

In [29]:
songs_data.groupby(key_column_names='artist', operations={'total_count': tc.aggregate.SUM('listen_count')}).sort('total_count',ascending=False)

artist,total_count
Kings Of Leon,43218
Dwight Yoakam,40619
Björk,38889
Coldplay,35362
Florence + The Machine,33387
Justin Bieber,29715
Alliance Ethnik,26689
OneRepublic,25754
Train,25402
The Black Keys,22184


# Popularity Model

In [17]:
train_data,test_data = songs_data.random_split(.8,seed=0)

In [18]:
popularity_model = tc.popularity_recommender.create(train_data,user_id='user_id',item_id='song')

In [20]:
popularity_model.recommend(users=[users_kanye[0]]) # no personalization found

user_id,song,score,rank
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Sehr kosmisch - Harmonia,4754.0,1
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Undo - Björk,4227.0,2
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,You're The One - Dwight Yoakam ...,3781.0,3
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Dog Days Are Over (Radio Edit) - Florence + The ...,3633.0,4
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Revelry - Kings Of Leon,3527.0,5
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Horn Concerto No. 4 in E flat K495: II. Romance ...,3161.0,6
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Secrets - OneRepublic,3148.0,7
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Hey_ Soul Sister - Train,2538.0,8
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Fireflies - Charttraxx Karaoke ...,2532.0,9
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Tive Sim - Cartola,2521.0,10


# Similarilty based recommender

In [22]:
personalized_model = tc.item_similarity_recommender.create(train_data,user_id='user_id',item_id='song')

In [23]:
personalized_model.recommend(users=[users_kanye[0]]) # find some degree of personalization

user_id,song,score,rank
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,El Chupe Nibre (Album Version) - Danger Doom ...,0.0100851539108488,1
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Crosshairs - Dangerdoom,0.0100706732935375,2
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Your Star - The All- American Rejects ...,0.0098412682612737,3
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Don't Leave Me - The All- American Rejects ...,0.0094238519668579,4
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Magic - The Pussycat Dolls ...,0.0092443144983715,5
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,The Last Song - The All- American Rejects ...,0.008984570701917,6
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,When You Look Me In The Eyes - Jonas Brothers ...,0.0084607402483622,7
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,One More Sad Song - The All-American Rejects ...,0.0081760717762841,8
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,Mince Meat - Dangerdoom,0.0079040875037511,9
900ed526660df9c3442ebe736 db94eb0bc251b73 ...,A.T.H.F. (Aqua Teen Hunger Force) (Album ...,0.0078561984830432,10


## Let us investigate the most recommended songs using the personalized model

In [24]:
# We take a subset of users , precisely 10,000
subset_test_users = songs_data['user_id'].unique()[0:10000]

In [25]:
recommended_songs = personalized_model.recommend(subset_test_users,k=1)

In [26]:
recommended_songs.show()

In [27]:
recommended_songs.groupby(key_column_names='song', operations={'total_count': tc.aggregate.COUNT()}).sort('total_count',ascending=False)

song,total_count
Undo - Björk,439
Secrets - OneRepublic,356
Revelry - Kings Of Leon,218
You're The One - Dwight Yoakam ...,158
Fireflies - Charttraxx Karaoke ...,115
Horn Concerto No. 4 in E flat K495: II. Romance ...,103
Hey_ Soul Sister - Train,100
Sehr kosmisch - Harmonia,77
OMG - Usher featuring will.i.am ...,64
U Smile - Justin Bieber,47
