In [7]:
import csv
import numpy as np
import pandas as pd
from scipy.sparse import lil_matrix, csc_matrix
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

data = pd.read_csv('ratings.csv')
games = pd.read_csv('games.csv')

In [8]:
data.head(5)

Unnamed: 0,game_id,user_id,rating
0,1,314,5.0
1,1,439,3.0
2,1,588,5.0
3,1,1169,4.0
4,1,1185,4.0


In [9]:
print(data[data['rating'].isnull()])

        game_id  user_id  rating
236341     2366       24     NaN


In [10]:
data = data.dropna(subset=['rating'])
print(data[data['rating'].isnull()])

Empty DataFrame
Columns: [game_id, user_id, rating]
Index: []


In [14]:
m = data['user_id'].max() + 1 # number of user
n = data['game_id'].max() + 1 # number of item

R = lil_matrix((m, n))
for i, r in data.iterrows():
    R[int(r.user_id), int(r.game_id)] = r.rating
R = R.tocsc()

P = lil_matrix((m, m))
for i in range(m):
    P[i, i] = data[data.user_id == i].sum().rating
P = P.tocsc()

Q = lil_matrix((n, n))
for i in range(n):
    Q[i, i] = data[data.game_id == i].sum().rating
Q = Q.tocsc()

print('R.shape: ', R.shape)
print('P.shape: ', P.shape)
print('Q.shape: ', Q.shape)

R.shape:  (53425, 2367)
P.shape:  (53425, 53425)
Q.shape:  (2367, 2367)


# Item-Item collaborative filtering

In [41]:
mask = Q != 0
Q_star = Q.copy()
Q_star[mask] = np.sqrt(Q_star[mask])
Q_star[mask] = 1 / Q_star[mask]

Γ_ij = R * Q_star * R.transpose() * R * Q_star
Γ_ij

<53425x2367 sparse matrix of type '<class 'numpy.float64'>'
	with 24529136 stored elements in Compressed Sparse Column format>

In [43]:
Γ_ij.shape

(53425, 2367)

In [48]:
row = Γ_ij.getrow(5461).toarray().reshape((-1))
print(row)
ind = np.argpartition(row, -5)[-5:]
results = games[games.game_id.isin(ind[np.flip(np.argsort(row[ind]))])]
results['recommendation_score'] = row[ind[np.flip(np.argsort(row[ind]))]]
results

[0.00000000e+00 6.73797865e+02 7.52854372e+02 ... 1.49436702e-01
 1.28195217e+02 6.80483947e+01]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


Unnamed: 0,game_id,name,release_date,summary,meta_score,recommendation_score
3,4.0,SoulCalibur,8-Sep-99,"This is a tale of souls and swords, transcendi...",98.0,768.432988
4,5.0,Super Mario Galaxy,12-Nov-07,[Metacritic's 2007 Wii Game of the Year] The u...,97.0,767.131608
14,15.0,Super Mario Odyssey,27-Oct-17,New Evolution of Mario Sandbox-Style Gameplay....,97.0,766.948801
41,42.0,Halo 2,9-Nov-04,The Covenant alien race threatens to destroy a...,95.0,766.667109
117,118.0,SoulCalibur II,27-Aug-03,Open the next chapter in world of Soul Calibur...,93.0,762.831341


In [49]:
row = Γ_ij.getrow(10140).toarray().reshape((-1))
ind = np.argpartition(row, -5)[-5:]
results = games[games.game_id.isin(ind[np.flip(np.argsort(row[ind]))])]
results['recommendation_score'] = row[ind[np.flip(np.argsort(row[ind]))]]
results

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,game_id,name,release_date,summary,meta_score,recommendation_score
1,2.0,Tony Hawk's Pro Skater 2,20-Sep-00,As most major publishers' development efforts ...,98.0,589.718898
3,4.0,SoulCalibur,8-Sep-99,"This is a tale of souls and swords, transcendi...",98.0,581.086122
6,7.0,Red Dead Redemption 2,26-Oct-18,Developed by the creators of Grand Theft Auto ...,97.0,569.842213
14,15.0,Super Mario Odyssey,27-Oct-17,New Evolution of Mario Sandbox-Style Gameplay....,97.0,562.506302
41,42.0,Halo 2,9-Nov-04,The Covenant alien race threatens to destroy a...,95.0,558.266647


# User-User collaborative filtering

In [50]:
# P_star = 1 / np.sqrt(P)
# P_star[P_star == np.inf] = 0

mask = P != 0
P_star = P.copy()
P_star[mask] = np.sqrt(P_star[mask])
P_star[mask] = 1 / P_star[mask]



In [51]:
Γ_u = P_star * R * R.transpose() * P_star * R
Γ_u


<53425x2367 sparse matrix of type '<class 'numpy.float64'>'
	with 24529136 stored elements in Compressed Sparse Column format>

In [52]:
row = Γ_u.getrow(5461).toarray().reshape((-1))
ind = np.argpartition(row, -5)[-5:]
results = games[games.game_id.isin(ind[np.flip(np.argsort(row[ind]))])]
results['recommendation_score'] = row[ind[np.flip(np.argsort(row[ind]))]]
results

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,game_id,name,release_date,summary,meta_score,recommendation_score
3,4.0,SoulCalibur,8-Sep-99,"This is a tale of souls and swords, transcendi...",98.0,493.963613
14,15.0,Super Mario Odyssey,27-Oct-17,New Evolution of Mario Sandbox-Style Gameplay....,97.0,481.479671
30,31.0,The Legend of Zelda: The Wind Waker,24-Mar-03,In this ninth entry in the Legend of Zelda ser...,96.0,476.905938
41,42.0,Halo 2,9-Nov-04,The Covenant alien race threatens to destroy a...,95.0,476.046984
264,265.0,Resident Evil,30-Mar-96,Raccoon City has been completely overrun by mu...,91.0,471.184601


In [53]:
row = Γ_u.getrow(10140).toarray().reshape((-1))
ind = np.argpartition(row, -5)[-5:]
results = games[games.game_id.isin(ind[np.flip(np.argsort(row[ind]))])]
results['recommendation_score'] = row[ind[np.flip(np.argsort(row[ind]))]]
results

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,game_id,name,release_date,summary,meta_score,recommendation_score
1,2.0,Tony Hawk's Pro Skater 2,20-Sep-00,As most major publishers' development efforts ...,98.0,429.950483
3,4.0,SoulCalibur,8-Sep-99,"This is a tale of souls and swords, transcendi...",98.0,410.184308
14,15.0,Super Mario Odyssey,27-Oct-17,New Evolution of Mario Sandbox-Style Gameplay....,97.0,400.65024
30,31.0,The Legend of Zelda: The Wind Waker,24-Mar-03,In this ninth entry in the Legend of Zelda ser...,96.0,392.361418
41,42.0,Halo 2,9-Nov-04,The Covenant alien race threatens to destroy a...,95.0,390.98744
