# Collaborative Filtering Recommendations using Original Values

In [1]:
import pandas as pd
import numpy as np
import implicit

from matplotlib import pyplot as plt
from scipy.sparse import csr_matrix

In [2]:
train = pd.read_csv("data\\train-plays.csv")
test = pd.read_csv("data\\test-plays.csv")
game_coding = pd.read_csv("data\\game-coding.csv")

In [3]:
train.shape[0]

36447

## Format training data

In [4]:
game_user = train.pivot(index="game_id", columns="user_id", values="amount")

In [5]:
game_user = game_user.fillna(0)

In [79]:
user_coding = pd.DataFrame({"original":game_user.columns, "coded":np.arange(game_user.columns.size)})

In [6]:
game_user_sparse = csr_matrix(game_user)

In [7]:
user_game = game_user.T

In [8]:
user_game_sparse = csr_matrix(user_game)

In [9]:
user_ids = train['user_id'].unique()

In [26]:
matrix_size = game_user_sparse.shape[0] * game_user_sparse.shape[1] # Number of possible interactions in the matrix
num_played = len(game_user_sparse.nonzero()[0]) # Number of items interacted with
sparsity = 100 * (1 - (num_played / matrix_size))
sparsity

96.98755248454393

## Alternating Least Squares

In [351]:
als = implicit.als.AlternatingLeastSquares(128, 0.05, iterations=50)

In [352]:
als.fit(game_user_sparse)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




In [353]:
# rows_list = list()
# games_recommended = set()
# for user_id in user_ids:
#     results_dict = dict()
#     results_dict['user_id'] = user_id
#     test_df = test[test['user_id'] == user_id]
#     recommendations_df = pd.DataFrame(als.recommend(user_id, user_game_sparse))
#     recommended = test_df.merge(recommendations_df, left_on="game_id", right_on=0)
#     results_dict['num_recommended'] = recommended.shape[0]
#     results_dict['total_to_recommend'] = test_df.shape[0]
#     rows_list.append(results_dict)
#     games_recommended.update(recommended["game_id"].unique())

In [354]:
game_recs = np.ndarray.flatten(als.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

HBox(children=(IntProgress(value=0, max=1160), HTML(value='')))




In [355]:
results = recommendations.merge(test, on="user_id")

In [356]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [357]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.1810344827586207

In [358]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.7363374880153404

In [95]:
similar = als.similar_items(1, 10)
for sim in similar:
    idx, score = sim
    print(game_coding[game_coding['game_id'] == idx]['game_name'])

1    Portal 2
Name: game_name, dtype: object
841    Portal Stories Mel
Name: game_name, dtype: object
493    Thinking with Time Machine
Name: game_name, dtype: object
23    Portal
Name: game_name, dtype: object
61    Divinity Original Sin Enhanced Edition
Name: game_name, dtype: object
288    Ricochet
Name: game_name, dtype: object
897    Pool Nation
Name: game_name, dtype: object
316    LIMBO
Name: game_name, dtype: object
267    Magic 2015
Name: game_name, dtype: object
957    Tactical Intervention
Name: game_name, dtype: object


In [64]:
train.head()

Unnamed: 0,amount,norm_amount,game_id,user_id
0,9.7,0.005493,9,0
1,15.9,0.09521,8,0
2,271.0,0.801775,10,0
3,13.1,0.019208,2,0
4,24.0,0.06015,3,0


In [134]:
train['rank'] = train.groupby(['user_id'])['norm_amount'].rank(ascending = False)

# filtering for their first choice
train_1  = train[train['rank'] == 1]


# Users with portal as their first choice
train_1[train_1['game_id'] == 1].head()

Unnamed: 0,amount,norm_amount,game_id,user_id,rank
14974,725.0,1.0,1,276,1.0


In [135]:
train[train['user_id'] == 276].head(10)

Unnamed: 0,amount,norm_amount,game_id,user_id,rank
14974,725.0,1.0,1,276,1.0
14975,3.8,0.090476,333,276,7.0
14976,1.1,0.004867,785,276,9.0
14977,369.0,0.770355,116,276,2.0
14978,6.7,0.008839,20,276,8.0
14979,8.1,0.188372,168,276,6.0
14980,32.0,0.477612,211,276,4.0
14981,9.9,0.538043,841,276,3.0
14982,32.0,0.307692,27,276,5.0


In [137]:
from collections import Counter

num_correct = 0
total = 0
for i in range(0, len(user_coding.index)):
    user_id = user_coding[user_coding['coded'] == i]['original'][i]
    user_df = train[train['user_id'] == user_id]
    num_rows = len(user_df.index)
    res_dict = dict()
    for idx, row in user_df.iterrows():
        weight = len(user_df.index) - row['rank'] + 1
        game_id = row['game_id'].astype('int')
        similar = als.similar_items(game_id, 10)
        for sim in similar:
            idx, score = sim
            if idx in res_dict:
                res_dict[idx] = res_dict[idx] + (1 * weight)
            else:
                res_dict[idx] = (1 * weight)
    train_ids = user_df.game_id.unique()
    to_del = list()
    for k in res_dict.keys():
        if k in train_ids:
            to_del.append(k)
    for k in to_del:
        del res_dict[k]
    recs = list(dict(Counter(res_dict).most_common(10)).keys())
    
    total = total + 1
    test_id = test[test['user_id'] == user_id]['game_id'][i]
    if test_id in recs:
        num_correct = num_correct + 1
        
print(num_correct / total)

0.05086206896551724


# Bayesian Personalized Ranking

In [373]:
bpr = implicit.bpr.BayesianPersonalizedRanking(128, 0.01, 0.05, iterations=200)

In [374]:
bpr.fit(game_user_sparse)

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))




In [375]:
game_recs = np.ndarray.flatten(bpr.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

HBox(children=(IntProgress(value=0, max=1160), HTML(value='')))




In [376]:
results = recommendations.merge(test, on="user_id")

In [377]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [378]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.1896551724137931

In [379]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.9817833173537871

# Logistic Matrix Factorization

In [344]:
lmf = implicit.lmf.LogisticMatrixFactorization(128)

In [345]:
lmf.fit(game_user_sparse)
print(lmf.learning_rate, lmf.regularization, lmf.iterations)

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))


0.85 2.0 30


In [346]:
game_recs = np.ndarray.flatten(lmf.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

HBox(children=(IntProgress(value=0, max=1160), HTML(value='')))




In [347]:
results = recommendations.merge(test, on="user_id")

In [348]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [349]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.056896551724137934

In [350]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.009587727708533078