In [1]:
import graphlab
import requests

In [2]:
graphlab.set_runtime_config('GRAPHLAB_CACHE_FILE_LOCATIONS', 'G:\GraphLabTemp') # set cache to external drive

This non-commercial license of GraphLab Create for academic use is assigned to jbmlaird@gmail.com and will expire on April 20, 2018.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\j\AppData\Local\Temp\graphlab_server_1493846878.log.0


In [3]:
discogs_frame = graphlab.SFrame('discogs.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


## Rating: 0 unrated, 1-5 rated

## Category: 0 = collection, 1 = wantlist, 2 = for sale

In [4]:
discogs_frame.rename({'1':'userId', '63257':'releaseId', '0':'rating','0.1':'category'}) # Lost a row here renaming. Add it later

userId,releaseId,rating,category
1,31435,0,0
1,77503,0,0
1,19282,0,0
1,71232,0,0
1,83485,0,0
1,32466,0,0
1,5471,5,0
1,5482,0,0
1,10372,0,0
1,13148,0,1


In [5]:
graphlab.canvas.set_target('ipynb')
discogs_frame.show()

#### ~215,000  users

# Find the most occuring releases

In [6]:
most_frequent_releases = discogs_frame.groupby(key_columns={'releaseId'}, operations={'appearance_count':
                                                           graphlab.aggregate.COUNT('releaseId')}).sort('appearance_count', ascending=False)

In [7]:
most_frequent_releases

releaseId,appearance_count
4570366,9825
526351,8443
367104,7276
2911293,7031
1587168,6403
5764967,6159
74260,6127
194021,6046
1343227,5748
242785,5455


## Get the human readable names of these releases

In [8]:
discogskey = open('discogskey.txt').readline() # Get my Discogs API token

In [9]:
payload = {'token':discogskey}
def get_release_name(releaseId):
    response = requests.get('https://api.discogs.com/releases/' + str(releaseId), params=payload)
    print response.json()['artists'][0]['name'] + " " + response.json()['title']

In [10]:
for releaseId in most_frequent_releases['releaseId'][0:10]:
    get_release_name(str(releaseId))

Daft Punk Random Access Memories
Fleetwood Mac Rumours
Pink Floyd The Dark Side Of The Moon
Michael Jackson Thriller
Radiohead OK Computer
Jack White (2) Lazaretto
Godspeed You Black Emperor! Lift Your Skinny Fists Like Antennas To Heaven
Prince And The Revolution Purple Rain
Bruce Springsteen Born In The U.S.A.
MF Doom Madvillainy


## Number of rated releases

In [11]:
print "One star ratings: " + str(format(len(discogs_frame[discogs_frame['rating']==1]), ",d"))
print "Two star ratings: " + str(format(len(discogs_frame[discogs_frame['rating']==2]), ",d"))
print "Three star ratings: " + str(format(len(discogs_frame[discogs_frame['rating']==3]), ",d"))
print "Four star ratings: " + str(format(len(discogs_frame[discogs_frame['rating']==4]), ",d"))
print "Five star ratings: " + str(format(len(discogs_frame[discogs_frame['rating']==5]), ",d"))

One star ratings: 133,695
Two star ratings: 342,173
Three star ratings: 1,537,875
Four star ratings: 3,138,727
Five star ratings: 4,486,020


In [12]:
print "Unrated: " + str(format(len(discogs_frame[discogs_frame['rating']==0]), ",d"))

Unrated: 80,385,187


## Create a subset containing only collection/wantlist/for sale without the 3 star ratings or below

In [13]:
no_star_frame = discogs_frame[discogs_frame['rating']==0]
four_or_five_star_frame = discogs_frame[discogs_frame['rating']>3]

In [14]:
no_star_frame.show()

In [15]:
no_four_five_star_frame = no_star_frame.append(four_or_five_star_frame)

In [16]:
no_four_five_star_frame.show()

## Create an item similarity recommender

In [17]:
train_data, test_data = graphlab.recommender.util.random_split_by_user(no_four_five_star_frame,
                                                                      user_id='userId',
                                                                      item_id='releaseId',
                                                                      max_num_users=None,
                                                                      random_seed=0)

In [18]:
print len(train_data)
print len(test_data)

70407893
17602041


In [19]:
item_similarity_model_ials = graphlab.recommender.ranking_factorization_recommender.create(train_data, 
                                                                   user_id='userId',
                                                                   item_id='releaseId',
                                                                   solver='ials'
                                                                   # don't use target= as '0' ratings exist
                                                                   )

In [20]:
item_similarity_model_default = graphlab.recommender.ranking_factorization_recommender.create(train_data, 
                                                                   user_id='userId',
                                                                   item_id='releaseId'
                                                                   # don't use target= as '0' ratings exist
                                                                   )

In [21]:
item_similarity_model_default.save('item_similarity_default')

In [22]:
item_similarity_model_ials.save('item_similarity_ials')

In [27]:
item_similarity_model_default.recommend(users=[test_data['userId'][0]])

userId,releaseId,score,rank
1,4570366,0.551233771365,1
1,526351,0.547710765788,2
1,367104,0.544642246177,3
1,2911293,0.543574780664,4
1,1587168,0.541797898881,5
1,5764967,0.540846709232,6
1,74260,0.540641499006,7
1,194021,0.540630572504,8
1,1343227,0.539656205698,9
1,242785,0.538832057001,10


In [30]:
item_similarity_model_default.recommend(users=[test_data['userId'][2]])

userId,releaseId,score,rank
1,4570366,0.551233771365,1
1,526351,0.547710765788,2
1,367104,0.544642246177,3
1,2911293,0.543574780664,4
1,1587168,0.541797898881,5
1,5764967,0.540846709232,6
1,74260,0.540641499006,7
1,194021,0.540630572504,8
1,1343227,0.539656205698,9
1,242785,0.538832057001,10


In [35]:
unique_users = test_data['userId'].unique()

In [38]:
unique_users.show()

In [36]:
item_similarity_model_default.recommend(users=[unique_users[1]])

userId,releaseId,score,rank
79732,4570366,0.551153289368,1
79732,526351,0.547572655909,2
79732,367104,0.544541723222,3
79732,2911293,0.54343684648,4
79732,1587168,0.541707188074,5
79732,5764967,0.540717864331,6
79732,74260,0.540554146247,7
79732,194021,0.540492827622,8
79732,1343227,0.539500315822,9
79732,242785,0.538745751563,10


In [39]:
item_similarity_model_default.recommend(users=[unique_users[2]])

userId,releaseId,score,rank
7899,4570366,0.551283368953,1
7899,526351,0.547702232255,2
7899,367104,0.544672011043,3
7899,2911293,0.543566671804,4
7899,1587168,0.541837731358,5
7899,5764967,0.540847566669,6
7899,74260,0.540684132823,7
7899,194021,0.540622625002,8
7899,1343227,0.539629830244,9
7899,242785,0.53887604022,10


## Above model seems to have little personalisation (if any)

In [40]:
item_similarity_model_ials.recommend(users=[unique_users[0]])

userId,releaseId,score,rank
211023,3235,0.124881193042,1
211023,11650,0.113591007888,2
211023,14008,0.100242123008,3
211023,367315,0.0997008383274,4
211023,2719,0.0985483527184,5
211023,35527,0.0966463088989,6
211023,11879,0.0960903316736,7
211023,161255,0.095758959651,8
211023,443973,0.0952921509743,9
211023,555652,0.0919819921255,10


In [41]:
item_similarity_model_ials.recommend(users=[unique_users[1]])

userId,releaseId,score,rank
79732,4570366,0.0367681011558,1
79732,1587168,0.030897513032,2
79732,1450555,0.0276162102818,3
79732,1361007,0.0262891165912,4
79732,242785,0.0259638763964,5
79732,1359927,0.0256631523371,6
79732,74260,0.0256156809628,7
79732,1187003,0.0248263273388,8
79732,5764967,0.0238802488893,9
79732,2606952,0.0237387046218,10


In [42]:
item_similarity_model_ials.recommend(users=[unique_users[2]])

userId,releaseId,score,rank
7899,4570366,0.0263876877725,1
7899,1587168,0.0249807834625,2
7899,1361007,0.0221428461373,3
7899,1450555,0.0209820382297,4
7899,1359927,0.0207548420876,5
7899,5764967,0.0200974363834,6
7899,1187003,0.0197860337794,7
7899,2093841,0.0181937236339,8
7899,2173833,0.0180593822151,9
7899,2940876,0.0176099278033,10


In [43]:
item_similarity_model_ials.recommend(users=[unique_users[3]])

userId,releaseId,score,rank
25263,1279,0.302890717983,1
25263,1039,0.299767792225,2
25263,2231,0.28274551034,3
25263,140,0.275118231773,4
25263,11375,0.271989405155,5
25263,946,0.267859756947,6
25263,3224,0.258689761162,7
25263,2258,0.254996418953,8
25263,139,0.253149390221,9
25263,236605,0.24516877532,10


In [49]:
for release in item_similarity_model_ials.recommend(users=[unique_users[3]])[0:10]:
    get_release_name(release['releaseId'])

Jeff Mills The Purpose Maker
Jeff Mills Steampit EP
Millsart Mecca EP
Jeff Mills Force Universelle EP
Laurent Garnier Crispy Bacon (Part 1)
Jeff Mills Java EP
Jeff Mills The Other Day EP
Plastikman Spastik
Jeff Mills Skin Deep EP
Daft Punk Homework


## This model is personalised and recommends in the same genre

In [50]:
for release in item_similarity_model_ials.recommend(users=[unique_users[4]])[0:10]:
    get_release_name(release['releaseId'])

Burial Untrue
Burial Burial
Burial Street Halo
Burial Kindred
Joy Orbison Hyph Mngo / Wet Look
Skream Midnight Request Line / I
Burial Truant
Loefah Disko Rekah / All Of A Sudden
Mala (4) Left Leg Out / Blue Notez
Boards Of Canada Tomorrow's Harvest


In [51]:
for release in item_similarity_model_ials.recommend(users=[unique_users[5]])[0:10]:
    get_release_name(release['releaseId'])

Fleetwood Mac Rumours
Pink Floyd The Dark Side Of The Moon
Bruce Springsteen Born In The U.S.A.
Michael Jackson Thriller
Prince And The Revolution Purple Rain
The Cars The Cars
Billy Joel The Stranger
Daft Punk Random Access Memories
Neil Young Harvest
Jack White (2) Lazaretto


In [52]:
for release in item_similarity_model_ials.recommend(users=[unique_users[6]])[0:10]:
    get_release_name(release['releaseId'])

Daft Punk Random Access Memories
Fleetwood Mac Rumours
Michael Jackson Thriller
Pink Floyd The Dark Side Of The Moon
Billy Joel The Stranger
Bruce Springsteen Born In The U.S.A.
Prince And The Revolution Purple Rain
Boards Of Canada Tomorrow's Harvest
Huey Lewis & The News Sports
Peter Frampton Frampton Comes Alive!


In [53]:
for release in item_similarity_model_ials.recommend(users=[unique_users[7]])[0:10]:
    get_release_name(release['releaseId'])

Godspeed You Black Emperor! Lift Your Skinny Fists Like Antennas To Heaven
Talking Heads Talking Heads: 77
Talking Heads More Songs About Buildings And Food
Devo Q: Are We Not Men? A: We Are Devo!
Talking Heads Remain In Light
Talking Heads Fear Of Music
Devo Freedom Of Choice
Fleet Foxes Fleet Foxes
Radiohead OK Computer
The Replacements Let It Be
