# Recommend Song

In [1]:
import graphlab

In [2]:
# Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1555654778.log


This non-commercial license of GraphLab Create for academic use is assigned to yakuang@adobe.com and will expire on March 22, 2020.


# Load Music Data

In [3]:
song_data = graphlab.SFrame('song_data.gl/')

In [31]:
song_data.head()

user_id,song_id,listen_count,title,artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOAKIMP12A8C130995,1,The Cove,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBXHDL12A81C204C0,1,Stronger,Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOBYHAJ12A6701BF1D,1,Constellations,Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODACBL12A8C13C273,1,Learn To Fly,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODDNQT12A6D4F5F7E,5,Apuesta Por El Rock 'N' Roll ...,Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SODXRTY12AB0180F3B,1,Paper Gangsta,Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFGUAY12AB017B0A8,1,Stacked Actors,Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOFRQTD12A81C233C0,1,Sehr kosmisch,Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...,SOHQWYZ12A6D4FA701,1,Heaven's gonna burn your eyes ...,Thievery Corporation feat. Emiliana Torrini ...

song
The Cove - Jack Johnson
Entre Dos Aguas - Paco De Lucia ...
Stronger - Kanye West
Constellations - Jack Johnson ...
Learn To Fly - Foo Fighters ...
Apuesta Por El Rock 'N' Roll - Héroes del ...
Paper Gangsta - Lady GaGa
Stacked Actors - Foo Fighters ...
Sehr kosmisch - Harmonia
Heaven's gonna burn your eyes - Thievery ...


In [38]:
graphlab.canvas.set_target("ipynb")
song_data["artist"].show()

# Task 1

In [4]:
# Counter unique user
users = song_data["user_id"].unique()
print "Unique users: {0}".format(len(users))

There are 66346 unique users.


In [12]:
kanye_west_user_group = song_data[song_data["artist"] == "Kanye West"]
kanye_west_user = kanye_west_user_group["user_id"].unique()
print "Unique users for Kanye West: {0}".format(len(kanye_west_user))

Unique users for Kanye West: 2522


In [13]:
foo_fighters_user_group = song_data[song_data["artist"] == "Foo Fighters"]
foo_fighters_user = foo_fighters_user_group["user_id"].unique()
print "Unique users for Foo Fighters: {0}".format(len(foo_fighters_user))

Unique users for Foo Fighters: 2055


In [14]:
taylor_swift_user_group = song_data[song_data["artist"] == "Taylor Swift"]
taylor_swift_user = taylor_swift_user_group["user_id"].unique()
print "Unique users for Taylor Swift: {0}".format(len(taylor_swift_user))

Unique users for Taylor Swift: 3246


In [15]:
lady_gaga_user_group = song_data[song_data["artist"] == "Lady GaGa"]
lady_gaga_user = lady_gaga_user_group["user_id"].unique()
print "Unique users for Lady GaGa: {0}".format(len(lady_gaga_user))

Unique users for Lady GaGa: 2928


# Task 2

In [40]:
total_count = song_data.groupby(key_columns="artist", operations={"total_count": graphlab.aggregate.SUM("listen_count")})
total_count.sort("total_count", ascending = True)

artist,total_count
William Tabbert,14
Reel Feelings,24
Beyoncé feat. Bun B and Slim Thug ...,26
Boggle Karaoke,30
Diplo,30
harvey summers,31
Nâdiya,36
Jody Bernal,38
Aneta Langerova,38
Kanye West / Talib Kweli / Q-Tip / Common / ...,38


# Task 3

## Build Personalization Based Recommender

In [5]:
train_data, test_data = song_data.random_split(.8, seed=0)

In [7]:
personalized_model = graphlab.item_similarity_recommender.create(train_data,
                                                                 user_id="user_id",
                                                                 item_id="song")

In [8]:
# Select the first 10,000 subset of user
subset_test_user = test_data["user_id"].unique()[0:10000]

In [13]:
# Compute one recommended song for each test user
subset_recommended_song = personalized_model.recommend(subset_test_user, k = 1)

In [17]:
# Use .groupby() to find the most recommended song
most_recommended_song = subset_recommended_song.groupby(key_columns="song", operations={"total_count": graphlab.aggregate.COUNT()})
most_recommended_song.sort("total_count", ascending = False)

song,total_count
Undo - Björk,438
Secrets - OneRepublic,336
Revelry - Kings Of Leon,223
You're The One - Dwight Yoakam ...,162
Fireflies - Charttraxx Karaoke ...,120
Hey_ Soul Sister - Train,113
Sehr kosmisch - Harmonia,112
Horn Concerto No. 4 in E flat K495: II. Romance ...,88
OMG - Usher featuring will.i.am ...,62
Bigger - Justin Bieber,43
