# Building a song recommender


# Fire up GraphLab Create

In [2]:
import graphlab

# Load music data

In [3]:
song_data = graphlab.SFrame('song_data.gl/')

[INFO] This non-commercial license of GraphLab Create is assigned to mrmthornton@gmail.com and will expire on October 27, 2016. For commercial licensing options, visit https://dato.com/buy/.

[INFO] Start server at: ipc:///tmp/graphlab_server-1649 - Server binary: /usr/local/lib/python2.7/dist-packages/graphlab/unity_server - Server log: /tmp/graphlab_server_1447952823.log
[INFO] GraphLab Server Version: 1.6.1


# Explore data

Music data shows how many times a user listened to a song, as well as the details of the song.

In [7]:
song_data.head(1)

user_id,song_id,listen_count,title,artist,song
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson,The Cove - Jack Johnson


## Showing the most popular songs in the dataset

In [8]:
graphlab.canvas.set_target('ipynb')

In [17]:
song_data['listen_count'].sort(ascending=False)

dtype: int
Rows: 1116609
[920, 796, 683, 676, 649, 605, 585, 553, 500, 500, 488, 453, 446, 431, 422, 419, 416, 415, 413, 401, 392, 392, 383, 375, 372, 372, 364, 359, 356, 353, 353, 333, 333, 333, 330, 328, 322, 311, 311, 310, 310, 307, 305, 302, 297, 295, 293, 292, 286, 284, 283, 274, 272, 270, 267, 267, 265, 261, 261, 260, 259, 255, 254, 252, 252, 252, 251, 250, 250, 247, 243, 236, 236, 235, 234, 232, 228, 226, 226, 226, 226, 226, 225, 224, 224, 224, 224, 221, 221, 220, 219, 218, 215, 208, 208, 208, 207, 207, 207, 206, ... ]

## Count number of unique users for different artists

In [29]:
filter = song_data['artist']=='Kanye West'
users = song_data[filter]
len(users['user_id'].unique())

2522

In [30]:
filter = song_data['artist']=='Foo Fighters'
users = song_data[filter]
len(users['user_id'].unique())

2055

In [31]:
filter = song_data['artist']=='Taylor Swift'
users = song_data[filter]
len(users['user_id'].unique())

3246

In [32]:
filter = song_data['artist']=='Lady GaGa'
users = song_data[filter]
len(users['user_id'].unique())

2928

# Group By 

In [37]:
grouped = song_data.groupby(key_columns='artist', operations={'total_count': graphlab.aggregate.SUM('listen_count')})
grouped = grouped.sort('total_count',ascending=False)
grouped[0]

{'artist': 'Kings Of Leon', 'total_count': 43218}

In [38]:
  grouped[-1]

{'artist': 'William Tabbert', 'total_count': 14}

# Create a song recommender
## split the data in test and train sets

In [39]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Simple similarity-based recommender

In [48]:
popularity_model = graphlab.popularity_recommender.create(
                                                            train_data,
                                                            user_id='user_id',
                                                            item_id='song')


PROGRESS: Recsys training: model = popularity
PROGRESS:     To use one of these as a target column, set target = <column_name>
PROGRESS:     and use a method that allows the use of a target.
PROGRESS: Preparing data set.
PROGRESS:     Data has 893580 observations with 66085 users and 9952 items.
PROGRESS:     Data prepared in: 2.45652s
PROGRESS: 893580 observations to process; with 9952 unique items.


In [49]:
similarity_model = graphlab.item_similarity_recommender.create(
                                                            train_data,
                                                            user_id='user_id',
                                                            item_id='song')

PROGRESS: Recsys training: model = item_similarity
PROGRESS:     To use one of these as a target column, set target = <column_name>
PROGRESS:     and use a method that allows the use of a target.
PROGRESS: Preparing data set.
PROGRESS:     Data has 893580 observations with 66085 users and 9952 items.
PROGRESS:     Data prepared in: 2.00938s
PROGRESS: Computing item similarity statistics:
PROGRESS: Computing most similar items for 9952 items:
PROGRESS: +-----------------+-----------------+
PROGRESS: | Number of items | Elapsed Time    |
PROGRESS: +-----------------+-----------------+
PROGRESS: | 1000            | 1.61367         |
PROGRESS: | 2000            | 1.84711         |
PROGRESS: | 3000            | 2.07711         |
PROGRESS: | 4000            | 2.29877         |
PROGRESS: | 5000            | 2.51733         |
PROGRESS: | 6000            | 2.71756         |
PROGRESS: | 7000            | 2.89418         |
PROGRESS: | 8000            | 3.05081         |
PROGRESS: | 9000          

In [50]:
subset_test_users = test_data['user_id'].unique()[0:10000]

In [51]:
popularity_model.recommend(subset_test_users,k=1)

PROGRESS: recommendations finished on 1000/10000 queries. users per second: 1704.36
PROGRESS: recommendations finished on 2000/10000 queries. users per second: 1721.25
PROGRESS: recommendations finished on 3000/10000 queries. users per second: 1727.31
PROGRESS: recommendations finished on 4000/10000 queries. users per second: 1761.57
PROGRESS: recommendations finished on 5000/10000 queries. users per second: 1765.03
PROGRESS: recommendations finished on 6000/10000 queries. users per second: 1769.67
PROGRESS: recommendations finished on 7000/10000 queries. users per second: 1782.63
PROGRESS: recommendations finished on 8000/10000 queries. users per second: 1785.63
PROGRESS: recommendations finished on 9000/10000 queries. users per second: 1783.47
PROGRESS: recommendations finished on 10000/10000 queries. users per second: 1792.36


user_id,song,score,rank
b048033af070b5dbb18d5d0e5 f334c9390611b04 ...,Sehr kosmisch - Harmonia,4754.0,1
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Sehr kosmisch - Harmonia,4754.0,1
ed04954d5b6001c7945c6ac71 686c3bd4ecdacb3 ...,Sehr kosmisch - Harmonia,4754.0,1
b1e6e9563da324641e644c769 b7edf202186de47 ...,Undo - Björk,4227.0,1
02f015d32ac2cd1e52d26e3ec 36048711dd5711b ...,Sehr kosmisch - Harmonia,4754.0,1
91b986eeb5d81eec60dc4b136 f04c0cfd662d658 ...,Sehr kosmisch - Harmonia,4754.0,1
f933855d675606737fdc191e9 edff7625d08aae8 ...,Sehr kosmisch - Harmonia,4754.0,1
4867d5516a280db13695b9b9c 7ce6b574f34c6b4 ...,Sehr kosmisch - Harmonia,4754.0,1
968f1baebc490d3c6999ee6c8 5c5cab8b726b347 ...,Sehr kosmisch - Harmonia,4754.0,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Sehr kosmisch - Harmonia,4754.0,1


In [53]:
small_set = similarity_model.recommend(subset_test_users,k=1)

PROGRESS: recommendations finished on 1000/10000 queries. users per second: 746.49
PROGRESS: recommendations finished on 2000/10000 queries. users per second: 748.137
PROGRESS: recommendations finished on 3000/10000 queries. users per second: 748.661
PROGRESS: recommendations finished on 4000/10000 queries. users per second: 753.093
PROGRESS: recommendations finished on 5000/10000 queries. users per second: 753.845
PROGRESS: recommendations finished on 6000/10000 queries. users per second: 756.264
PROGRESS: recommendations finished on 7000/10000 queries. users per second: 758.379
PROGRESS: recommendations finished on 8000/10000 queries. users per second: 760.004
PROGRESS: recommendations finished on 9000/10000 queries. users per second: 758.577
PROGRESS: recommendations finished on 10000/10000 queries. users per second: 743.22


In [54]:
small_set.head()

user_id,song,score,rank
b048033af070b5dbb18d5d0e5 f334c9390611b04 ...,Fantasy - The xx,0.037720015587,1
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...,Cuando Pase El Temblor - Soda Stereo ...,0.0194504525792,1
ed04954d5b6001c7945c6ac71 686c3bd4ecdacb3 ...,Coming Your Way - Iration,0.031314214241,1
b1e6e9563da324641e644c769 b7edf202186de47 ...,Pimpa's Paradise - Damian Marley / Stephen Marl ...,0.0694444444444,1
02f015d32ac2cd1e52d26e3ec 36048711dd5711b ...,Where The Boat Leaves From (Album) - Zac Brown ...,0.063530766032,1
91b986eeb5d81eec60dc4b136 f04c0cfd662d658 ...,Jezebel - Sade,0.0588785769489,1
f933855d675606737fdc191e9 edff7625d08aae8 ...,Schießt die Deutschen raus - Mario Lang ...,0.0357374917866,1
4867d5516a280db13695b9b9c 7ce6b574f34c6b4 ...,Two Steps_ Twice - Foals,0.0104654895666,1
968f1baebc490d3c6999ee6c8 5c5cab8b726b347 ...,Me_ Myself And I - Beyoncé ...,0.0183084820675,1
c067c22072a17d33310d7223d 7b79f819e48cf42 ...,Grind With Me (Explicit Version) - Pretty Ricky ...,0.0459424433009,1


In [55]:
small_set.groupby(key_columns='song', operations={'count': graphlab.aggregate.COUNT()}).sort("count", ascending=False)

song,count
Undo - Björk,447
Secrets - OneRepublic,373
Revelry - Kings Of Leon,228
You're The One - Dwight Yoakam ...,179
Fireflies - Charttraxx Karaoke ...,124
Hey_ Soul Sister - Train,117
Horn Concerto No. 4 in E flat K495: II. Romance ...,95
OMG - Usher featuring will.i.am ...,68
Sehr kosmisch - Harmonia,66
Dog Days Are Over (Radio Edit) - Florence + The ...,52
