# Collaborative Filtering Recommendations using Normalized Values

In [6]:
import pandas as pd
import numpy as np
import implicit

from matplotlib import pyplot as plt
from scipy.sparse import csr_matrix

In [7]:
train = pd.read_csv("data\\train-plays.csv")
test = pd.read_csv("data\\test-plays.csv")
game_coding = pd.read_csv("data\\game-coding.csv")

In [8]:
train.shape[0]

36447

## Format training data

In [9]:
game_user = train.pivot(index="game_id", columns="user_id", values="norm_amount")

In [10]:
game_user = game_user.fillna(0)

In [11]:
user_coding = pd.DataFrame({"original":game_user.columns, "coded":np.arange(game_user.columns.size)})

In [12]:
game_user_sparse = csr_matrix(game_user)

In [13]:
user_game = game_user.T

In [14]:
user_game_sparse = csr_matrix(user_game)

In [15]:
user_ids = train['user_id'].unique()

In [16]:
matrix_size = game_user_sparse.shape[0] * game_user_sparse.shape[1] # Number of possible interactions in the matrix
num_played = len(game_user_sparse.nonzero()[0]) # Number of items interacted with
sparsity = 100 * (1 - (num_played / matrix_size))
sparsity

96.98854431844481

# Alternating Least Squares

In [17]:
als = implicit.als.AlternatingLeastSquares(128, 0.05, iterations=50)



In [18]:
als.fit(game_user_sparse)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




In [19]:
# rows_list = list()
# games_recommended = set()
# for user_id in user_ids:
#     results_dict = dict()
#     results_dict['user_id'] = user_id
#     test_df = test[test['user_id'] == user_id]
#     recommendations_df = pd.DataFrame(als.recommend(user_id, user_game_sparse))
#     recommended = test_df.merge(recommendations_df, left_on="game_id", right_on=0)
#     results_dict['num_recommended'] = recommended.shape[0]
#     results_dict['total_to_recommend'] = test_df.shape[0]
#     rows_list.append(results_dict)
#     games_recommended.update(recommended["game_id"].unique())

In [20]:
game_recs = np.ndarray.flatten(als.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

HBox(children=(IntProgress(value=0, max=1160), HTML(value='')))




In [21]:
results = recommendations.merge(test, on="user_id")

In [22]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [23]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.08448275862068966

In [24]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.7651006711409396

# Bayesian Personalized Ranking

In [25]:
bpr = implicit.bpr.BayesianPersonalizedRanking(128, 0.01, 0.05, iterations=200)

In [26]:
bpr.fit(game_user_sparse)

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [27]:
game_recs = np.ndarray.flatten(bpr.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

HBox(children=(IntProgress(value=0, max=1160), HTML(value='')))




In [28]:
results = recommendations.merge(test, on="user_id")

In [29]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [30]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.19827586206896552

In [31]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.9779482262703739

# Logistic Matrix Factorization

In [32]:
lmf = implicit.lmf.LogisticMatrixFactorization(128)

In [33]:
lmf.fit(game_user_sparse)

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))


0.85 2.0 30


In [34]:
game_recs = np.ndarray.flatten(lmf.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

HBox(children=(IntProgress(value=0, max=1160), HTML(value='')))




In [35]:
results = recommendations.merge(test, on="user_id")

In [36]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [37]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.11551724137931034

In [38]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.32981783317353786

# ALS + BPR

In [40]:
user_recs = dict()
for user_id in user_ids:
    res = set(idx for idx, score in als.recommend(user_id, user_game_sparse))
    user_recs[user_id] = res

In [41]:
for user_id in user_ids:
    res = set(idx for idx, score in bpr.recommend(user_id, user_game_sparse))
    als_res = user_recs[user_id]
    for r in res:
        als_res.add(r)
    user_recs[user_id] = als_res

In [42]:
num_total = 0
num_correct = 0
for user_id in user_ids:
    test_df = test[test['user_id'] == user_id]
    test_game_id = test_df['game_id'][user_id]
    num_total = num_total + 1
    if test_game_id in user_recs[user_id]:
        num_correct = num_correct + 1
        
print(num_correct / num_total)

0.23103448275862068
