In [1]:
import sys
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import random

from sklearn.preprocessing import MinMaxScaler

import implicit

In [2]:

raw_data = pd.read_table(r'D:\Datasets\Lastfm\lastfm-dataset-360K\lastfm-dataset-360K\usersha1-artmbid-artname-plays.tsv')

raw_data = raw_data.drop(raw_data.columns[1], axis=1)
raw_data.columns = ['user', 'artist', 'plays']

# Drop NaN columns
data = raw_data.dropna()
data = data.copy()

# Create a numeric user_id and artist_id column
data['user'] = data['user'].astype("category")
data['artist'] = data['artist'].astype("category")
data['user_id'] = data['user'].cat.codes
data['artist_id'] = data['artist'].cat.codes


In [48]:
data[data.artist=='jay z']

Unnamed: 0,user,artist,plays,user_id,artist_id
5773009,547263b31d38e53028f6c8bf93169ed2cb8491dc,jay z,63,118135,147037
7261601,6a18b635aa70c205a3fe4ee79dda4ad9838dcb8b,jay z,2,148560,147037
11123910,a2a1df15693ef372f447eedbba1261bc3ed2fb8f,jay z,41,227601,147037
17481702,ff3184ce8ef9bca2d3f4a1becfeb981799268ec2,jay z,124,357753,147037


In [12]:
data.shape

(17535450, 5)

In [11]:
len(data.user_id.unique())

358868

In [13]:
len(data.artist_id.unique())

292363

In [14]:

# The implicit library expects data as a item-user matrix so we
# create two matricies, one for fitting the model (item-user) 
# and one for recommendations (user-item)
sparse_item_user = sparse.csr_matrix((data['plays'].astype(float), (data['artist_id'], data['user_id'])))
sparse_user_item = sparse.csr_matrix((data['plays'].astype(float), (data['user_id'], data['artist_id'])))


In [16]:

# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20,use_gpu = True)

# Calculate the confidence by multiplying it by our alpha value.
alpha_val = 15
data_conf = (sparse_item_user * alpha_val).astype('double')

#Fit the model
model.fit(data_conf)

 



HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




In [49]:

#---------------------
# FIND SIMILAR ITEMS
#---------------------

# Find the 10 most similar to Jay-Z
item_id = 147037 #Jay-Z
n_similar = 10

# Use implicit to get similar items.
similar = model.similar_items(item_id, n_similar)

# Print the names of our most similar artists
for item in similar:
    idx, score = item
    print(data.artist.loc[data.artist_id == idx].iloc[0])


jay z
dolla
big dime
all city (boo banga, big rich, san quinn)
the roots feat. peedi peedi & dice raw
jah cure & fantan mojah
cashis
japanese cartoon
nu jerzey devil
freekey zekey


In [46]:
   
#------------------------------
# CREATE USER RECOMMENDATIONS
#------------------------------

# Create recommendations for user with id 2025
user_id = 199842#2025

# Use the implicit recommender.
recommended = model.recommend(user_id, sparse_user_item)

artists = []
scores = []

# Get artist names from ids
for item in recommended:
    idx, score = item
    artists.append(data.artist.loc[data.artist_id == idx].iloc[0])
    scores.append(score)

# Create a dataframe of artist names and scores
recommendations = pd.DataFrame({'artist': artists, 'score': scores})

print(recommendations)

                        artist     score
0               andrés segovia  0.935877
1              giacomo puccini  0.935018
2              robert schumann  0.922636
3              george gershwin  0.922402
4  modest petrovich mussorgsky  0.919025
5            felix mendelssohn  0.915398
6             daniel barenboim  0.910043
7                gustav mahler  0.907797
8          the swingle singers  0.907123
9           franz joseph haydn  0.903835


In [31]:
model.user_factors[0]

array([-12.29312  ,  11.8302555,  -2.51117  ,   4.918436 ,  18.026499 ,
         8.100879 ,  -8.643769 ,   5.637821 ,   2.179194 ,  -1.8122557,
         7.4441   ,  24.524538 ,  11.792651 ,  -5.2495737,  15.84455  ,
        -1.4184906,   7.0281873,  12.075925 ,  11.967903 ,  -1.8662224],
      dtype=float32)

In [33]:
model.item_factors[0]

array([ 2.9723791e-05, -2.0016657e-06,  1.3718014e-05, -6.5996092e-06,
       -1.1899359e-05,  3.9194965e-06,  6.3422926e-06, -3.1925576e-06,
        1.4984930e-05,  2.4186907e-05, -7.9211995e-06,  3.2975693e-06,
       -1.9695393e-05,  7.4196182e-06, -8.9416553e-06, -7.3306583e-06,
       -1.3895602e-05,  3.9594656e-06,  1.9331051e-06, -3.8779078e-07],
      dtype=float32)