In [4]:
import pandas as pd
data = pd.read_csv('Beauty_rating.csv')
data.columns = ['empty','user','item','rating']
data = data[['user','item','rating']]

In [5]:
from sklearn.model_selection import train_test_split
train,test = train_test_split(data, test_size=0.3, random_state=1)

In [6]:
data.head()

Unnamed: 0,user,item,rating
0,A24FQNZ2ZCP9UH,B004DK0UDA,5.0
1,A9MYCYZT8EMMX,B00AWCNF9O,1.0
2,A1T5GAE8KIMVTF,B001V3TVEQ,5.0
3,A12KFVKK4UXYBH,B000U0CA0I,5.0
4,A9MYCYZT8EMMX,B001B7MAU4,5.0


In [43]:
#Alternating Least Squares with implicit package

import sys
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import random

from sklearn.preprocessing import MinMaxScaler

import implicit # The Cython library

# Drop NaN columns
data = data.dropna()
data = data.copy()

# Create a numeric user_id and artist_id column
data['user'] = data['user'].astype("category")
data['item'] = data['item'].astype("category")
data['user_id'] = data['user'].cat.codes
data['item_id'] = data['item'].cat.codes

# The implicit library expects data as a item-user matrix so we
# create two matricies, one for fitting the model (item-user) 
# and one for recommendations (user-item)
sparse_item_user = sparse.csr_matrix((data['rating'].astype(float), (data['item_id'], data['user_id'])))
sparse_user_item = sparse.csr_matrix((data['rating'].astype(float), (data['user_id'], data['item_id'])))

# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20)

# Calculate the confidence by multiplying it by our alpha value.
alpha_val = 15
data_conf = (sparse_item_user * alpha_val).astype('double')

# Fit the model
model.fit(data_conf)


#---------------------
# FROM ONE ITEMS TO FIND SIMILAR ITEMS
#---------------------


# Find the 10 most similar to Jay-Z
item_id = 1
n_similar = 10

#print('Find the %d most similar item to item id  = %d and with item name %s' % (n_similar, item_id, data[data.item_id == item_id]['item'].head(1)))
print('Find the %d most similar item to item id = %d' % (n_similar,item_id))

print('THe most similar items as below:')

# Get the user and item vectors from our trained model
user_vecs = model.user_factors
item_vecs = model.item_factors

# Calculate the vector norms
item_norms = np.sqrt((item_vecs * item_vecs).sum(axis=1))

# Calculate the similarity score, grab the top N items and
# create a list of item-score tuples of most similar artists
scores = item_vecs.dot(item_vecs[item_id]) / item_norms
top_idx = np.argpartition(scores, -n_similar)[-n_similar:]
similar = sorted(zip(top_idx, scores[top_idx] / item_norms[item_id]), key=lambda x: -x[1])

# Print the names of our most similar artists
for i in similar:
    idx, score = i
    print(data.item.loc[data.item_id == idx].iloc[0])

print("---End with finding silimar items---")
print('')
print('')
#------------------------------
# CREATE USER RECOMMENDATIONS
#------------------------------

def recommend(user_id, sparse_user_item, user_vecs, item_vecs, num_items=10):
    """The same recommendation function we used before"""

    user_interactions = sparse_user_item[user_id,:].toarray()

    user_interactions = user_interactions.reshape(-1) + 1
    user_interactions[user_interactions > 1] = 0

    rec_vector = user_vecs[user_id,:].dot(item_vecs.T).toarray()

    min_max = MinMaxScaler()
    rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0]
    recommend_vector = user_interactions * rec_vector_scaled

    item_idx = np.argsort(recommend_vector)[::-1][:num_items]

    items = []
    scores = []

    for idx in item_idx:
        items.append(data.item.loc[data.item_id == idx].iloc[0])
        scores.append(recommend_vector[idx])

    recommendations = pd.DataFrame({'item': items, 'score': scores})

    return recommendations


# Get the trained user and item vectors. We convert them to 
# csr matrices to work with our previous recommend function.
user_vecs = sparse.csr_matrix(model.user_factors)
item_vecs = sparse.csr_matrix(model.item_factors)

# Create recommendations for user with id 2025
user_id = 1
#print('Creame recommendations for user %s' % (data[data.user_id == user_id]['user'].head(1)))
print('Creame recommendations for user %d' % (user_id))
print('THe recommendation for the user as below:')


recommendations = recommend(user_id, sparse_user_item, user_vecs, item_vecs)

print(recommendations)

print("---End with recommendation---")

100%|██████████| 20.0/20 [00:00<00:00, 39.78it/s]


Find the 10 most similar item to item id = 1
THe most similar items as below:
9759091062
B000PU70WG
B004WSXD4G
B0086XX0AK
B008HODSNW
B000PR4F9K
B002PR2GLM
B004KOFORG
B007OX0RBS
B005IHS8KY
---End with finding silimar items---


Creame recommendations for user 1
THe recommendation for the user as below:
         item     score
0  B0043OYFKU  1.000000
1  B001S261Q6  0.918293
2  B0035RE2TA  0.893915
3  B0000530ED  0.891264
4  B001HKR6WM  0.841669
5  B002B9DWBC  0.838585
6  B006UET5UG  0.824630
7  B000ZMBSPE  0.821778
8  B000UVZU1S  0.821117
9  B002HWS7RM  0.812634
---End with recommendation---


In [29]:
data.head()

Unnamed: 0,user,item,rating,user_id,item_id
0,A24FQNZ2ZCP9UH,B004DK0UDA,5.0,4438,6543
1,A9MYCYZT8EMMX,B00AWCNF9O,1.0,12111,10142
2,A1T5GAE8KIMVTF,B001V3TVEQ,5.0,3124,4115
3,A12KFVKK4UXYBH,B000U0CA0I,5.0,280,2065
4,A9MYCYZT8EMMX,B001B7MAU4,5.0,12111,3128
