# Collaborative Filtering Recommendations using Normalized Values

In [1]:
import pandas as pd
import numpy as np
import implicit

from matplotlib import pyplot as plt
from scipy.sparse import csr_matrix

In [2]:
train = pd.read_csv("data\\train-plays.csv")
test = pd.read_csv("data\\test-plays.csv")

## Format training data

In [51]:
game_user = train.pivot(index="game_name", columns="user_id", values="norm_amount")

In [52]:
game_user = game_user.fillna(0)

In [53]:
# create user and game incodings
user_coding = pd.DataFrame({"original":game_user.columns, "coded":np.arange(game_user.columns.size)})
game_coding = pd.DataFrame({"original":game_user.index, "coded":np.arange(game_user.index.size)})

In [54]:
game_user.columns = user_coding['coded']
game_user.index = game_coding['coded']

In [55]:
game_user_sparse = csr_matrix(game_user)

In [56]:
user_game = game_user.T

In [57]:
user_game_sparse = csr_matrix(user_game)

In [69]:
train[train['game_name'] == 'Thief Deadly Shadows']

Unnamed: 0,user_id,game_name,amount,norm_amount
268,975449,Thief Deadly Shadows,4.9,0.371212
1533,10253354,Thief Deadly Shadows,4.0,0.30303
2379,11794760,Thief Deadly Shadows,0.1,0.007576
4824,24632218,Thief Deadly Shadows,0.4,0.030303
9480,45592640,Thief Deadly Shadows,13.2,1.0
12861,55906572,Thief Deadly Shadows,1.0,0.075758


# Alternating Least Squares

In [58]:
als = implicit.als.AlternatingLeastSquares(128)

In [59]:
als.fit(game_user_sparse)

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




In [123]:
user_coding[user_coding['coded'] == 1]['original'][1]

86540

In [142]:
rows_list = list()
for i in range(0, len(user_coding.index)):
    recommendations_df = pd.DataFrame(als.recommend(i, user_game_sparse)).merge(game_coding, left_on=0, right_on="coded")
    user_id = user_coding[user_coding['coded'] == i]['original'][i]
    test_df = test[test['user_id'] == user_id]
    test_df = test_df.reset_index(drop=True)
    recs_as_list = recommendations_df['original'].tolist()
    results_dict = dict()
    results_dict['user_coding'] = i
    results_dict['user_id'] = user_id
    results_dict['num_correctly_recommended'] = 0
    results_dict['num_not_recommended'] = 0
    results_dict['num_incorrectly_recommended'] = 0
    #print(recommendations_df, '\n\n', test_df)
    for j in range(0, len(test_df.index)):
        curr_row = test_df.loc[j, :]
        # user in top 95% of liking this game and the game was recommended
        if curr_row['game_name'] in recs_as_list and curr_row['norm_amount'] >= 0.05:
            results_dict['num_correctly_recommended'] = results_dict['num_correctly_recommended'] + 1
        # user in top 95% of liking this game and the game was not recommended
        elif curr_row['game_name'] not in recs_as_list and curr_row['norm_amount'] >= 0.05:
            results_dict['num_not_recommended'] = results_dict['num_not_recommended'] + 1
        # user in bottom 5% of liking this game and the game was recommended
        elif curr_row['game_name'] in recs_as_list and curr_row['norm_amount'] < 0.05:
            results_dict['num_incorrectly_recommended'] = results_dict['num_incorrectly_recommended'] + 1
    rows_list.append(results_dict)
    
results_df = pd.DataFrame(rows_list)
results_df.head()
            

Unnamed: 0,num_correctly_recommended,num_incorrectly_recommended,num_not_recommended,user_coding,user_id
0,1,1,2,0,76767
1,0,0,2,1,86540
2,1,1,0,2,229911
3,2,0,9,3,298950
4,0,0,0,4,554278


In [144]:
num_correctly_recommended = 0
num_incorrectly_recommended = 0
num_not_recommended = 0
total_amount = 0
for i in range(0, len(results_df.index)):
    curr_row = results_df.loc[i, :]
    num_correctly_recommended += curr_row['num_correctly_recommended']
    num_incorrectly_recommended += curr_row['num_incorrectly_recommended']
    num_not_recommended += curr_row['num_not_recommended']
    total_amount += curr_row['num_correctly_recommended'] + curr_row['num_not_recommended'] #+ curr_row['num_incorrectly_recommended']

#print(total_amount, num_correctly_recommended, num_not_recommended)
print(100 * (num_correctly_recommended / total_amount))
print(100 * (num_incorrectly_recommended / total_amount))

5.357142857142857
4.74537037037037


In [116]:
test[test['user_id'] == 298950]

Unnamed: 0,user_id,game_name,amount,norm_amount
14,298950,Battlefield Bad Company 2,0.4,0.000391
15,298950,Sleeping Dogs,1.1,0.007914
16,298950,The Forest,6.9,0.041566
17,298950,"Sir, You Are Being Hunted",0.9,0.05
18,298950,Red Faction Guerrilla Steam Edition,1.5,0.02381
19,298950,Half-Life 2,0.1,0.000962
20,298950,Prison Architect,2.4,0.012973
21,298950,Sid Meier's Civilization V,135.0,0.024622
22,298950,Sanctum,5.8,0.175758
23,298950,Metro Last Light,9.8,0.058333


In [44]:
user_coding

Unnamed: 0,original,coded
0,76767,0
1,86540,1
2,229911,2
3,298950,3
4,554278,4
5,561758,5
6,748719,6
7,975449,7
8,1072465,8
9,1129452,9


In [45]:
train[train['user_id'] == 76767]

Unnamed: 0,user_id,game_name,amount,norm_amount
0,76767,Rise of Nations Extended Edition,5.7,0.107547
1,76767,Worms Armageddon,0.4,0.02
2,76767,Age of Empires II HD Edition,13.1,0.019208
3,76767,Call of Duty Modern Warfare 2,65.0,0.137131
4,76767,Alien Swarm,0.8,0.009639
5,76767,Counter-Strike,365.0,0.078259
6,76767,Banished,24.0,0.06015
7,76767,Counter-Strike Source,25.0,0.005346
8,76767,Call of Duty Modern Warfare 2 - Multiplayer,165.0,0.144991
9,76767,The Stanley Parable,1.8,0.051429
