In [1]:
import gzip
import pandas as pd
import numpy as np
from datetime import datetime

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import jaccard_score
from sklearn.metrics import accuracy_score

import gzip
from implicit import bpr
from implicit import als
import random
import scipy

# Data

In [2]:
user_reviews = pd.read_csv('data\\user_reviews_clean.csv')
user_reviews['posted'] = pd.to_datetime(user_reviews['posted'])

In [3]:
user_games = pd.read_csv('data\\user_games.csv')

In [4]:
games = pd.read_csv('data\\steam_games_clean.csv')
games['developer'] = games['developer'].apply(lambda developer: developer if type(developer) == str else '')
games['release_date'] = pd.to_datetime(games['release_date'])

In [5]:
user_games_info = user_games.merge(games, how='inner', left_on='item_id', right_on='id')

# Train-Test Split

In [6]:
user_games = user_games_info[user_games.columns]
user_games.head()

Unnamed: 0,user_id,items_count,steam_id,user_url,item_id,item_name,playtime_forever,playtime_2weeks
0,76561197970982479,277,76561197970982479,http://steamcommunity.com/profiles/76561197970...,10,Counter-Strike,6,0
1,js41637,888,76561198035864385,http://steamcommunity.com/id/js41637,10,Counter-Strike,0,0
2,Riot-Punch,328,76561197963445855,http://steamcommunity.com/id/Riot-Punch,10,Counter-Strike,0,0
3,doctr,541,76561198002099482,http://steamcommunity.com/id/doctr,10,Counter-Strike,93,0
4,corrupted_soul,115,76561198007659921,http://steamcommunity.com/id/corrupted_soul,10,Counter-Strike,108,0


In [7]:
X_train, X_test = train_test_split(user_games, test_size=0.2, random_state=42)

### Building Test Set

In [8]:
all_games = user_games['item_name'].unique()
gamesPerUser = user_games.groupby('user_id')['item_name'].apply(list)

def choose_random_game(user):
    rand_game = np.random.choice(all_games)
    already_owned = gamesPerUser.loc[user]
    if rand_game in already_owned:
        rand_game = np.random.choice(all_games)
    return rand_game

In [9]:
positive_entries = X_test[['user_id','item_name']]
positive_entries['owned'] = positive_entries['user_id'].apply(lambda val: 1)
rand_games = X_test['user_id'].apply(choose_random_game)
negative_entries = pd.concat([X_test['user_id'], 
                              rand_games.rename('item_name'), 
                              X_test['user_id'].apply(lambda val: 0).rename('owned')],axis=1)
X_test = pd.concat([positive_entries, negative_entries])
X_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  positive_entries['owned'] = positive_entries['user_id'].apply(lambda val: 1)


Unnamed: 0,user_id,item_name,owned
2745117,76561198020361485,Red Faction,1
325811,76561197996613372,Left 4 Dead 2,1
523983,76561198062459887,Super Meat Boy,1
528471,Grickles,Super Meat Boy,1
831408,SyberSybus,FTL: Faster Than Light,1
...,...,...,...
1542068,76561198091119630,Blast Em!,0
3474482,GloryToAllah,Turner,0
3909952,Demkriticz,Blacksea Odyssey,0
2589371,29998000,STEEL RIVALS,0


# Interaction Matrices

### Binary Interaction Matrix

In [10]:
%%time
X_train['owned'] = X_train['playtime_forever'].apply(lambda time: 1)
interaction_matrix = pd.pivot_table(X_train, index='user_id', columns='item_name', values='owned')
interaction_matrix = interaction_matrix.fillna(0)
interaction_matrix = interaction_matrix.astype('int64')
interaction_matrix

CPU times: total: 20.3 s
Wall time: 20.4 s


item_name,神明的一天世界(God's One Day World),! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,#SkiJump,$1 Ride,'n Verlore Verstand,.EXE,001 Game Creator,0RBITALIS,...,stratO,the static speaks my name,theBlu,theHunter,theHunter: Primal,vrAMP,丛林守望者（Ranger of the jungle）,侠客风云传(Tale of Wuxia),神楽道中記(KaguraDouchuuki),軒轅劍外傳穹之扉(The Gate of Firmament)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
--ace--,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
--ionex--,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-2SV-vuLB-Kg,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-404PageNotFound-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzonci,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzoptimuszz,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzydrax,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzyfo,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Implicit Rating Interaction Matrix

In [11]:
def calc_implicit_rating(playtime, game_median):
    if playtime >= game_median:
        return 5
    elif playtime >= 0.75*game_median:
        return 4
    elif playtime >= 0.5*game_median:
        return 3
    elif playtime >= 0.25*game_median:
        return 2
    else:
        return 1

In [12]:
%%time
cont_X_train = X_train.merge(X_train.groupby('item_name')['playtime_forever'].median().rename('playtime_median'), left_on='item_name', right_index=True)
cont_X_train['implicit_rating'] = cont_X_train.apply(lambda df: calc_implicit_rating(df['playtime_forever'], df['playtime_median']), axis=1)
cont_interaction_matrix = pd.pivot_table(cont_X_train, index='user_id', columns='item_name', values='implicit_rating')
cont_interaction_matrix = cont_interaction_matrix.fillna(0)
cont_interaction_matrix = cont_interaction_matrix.astype('int64')
cont_interaction_matrix

CPU times: total: 1min 2s
Wall time: 1min 2s


item_name,神明的一天世界(God's One Day World),! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,#SkiJump,$1 Ride,'n Verlore Verstand,.EXE,001 Game Creator,0RBITALIS,...,stratO,the static speaks my name,theBlu,theHunter,theHunter: Primal,vrAMP,丛林守望者（Ranger of the jungle）,侠客风云传(Tale of Wuxia),神楽道中記(KaguraDouchuuki),軒轅劍外傳穹之扉(The Gate of Firmament)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
--ace--,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
--ionex--,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-2SV-vuLB-Kg,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
-404PageNotFound-,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zzonci,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzoptimuszz,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzydrax,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
zzyfo,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Baseline Model

In [13]:
def Jaccard(user1, user2):
    return jaccard_score(interaction_matrix.T[user1], interaction_matrix.T[user2])

In [14]:
top_games = np.array(user_games.groupby('item_name')['user_id'].count().sort_values(ascending=False).head(1000).index)

def predict_owned(user, game):
    
    if user not in interaction_matrix.index:
        return game in top_games
    if game not in interaction_matrix.columns:
        return False
    
    game_interactions = interaction_matrix[game]
    game_users = game_interactions[game_interactions==1].index
    
    similarities = []
    
    counter = 0
    for user2 in game_users:
        if counter > 20:
            break
        if user != user2:
            similarities.append(Jaccard(user,user2))
            counter += 1
            
    if (game in top_games) or (max(similarities, default=0) > 0.45):
    #if game in top_games:
        return True
    return False

In [15]:
def test_baseline(n):
    pos = X_test[X_test['owned']==1].iloc[:int(n/2)]
    neg = X_test[X_test['owned']==0].iloc[:int(n/2)]
    test_data = pd.concat([pos, neg])
    
    X = test_data.drop(columns=['owned'])
    
    y = test_data['owned']
    
    
    y_pred = X.apply(lambda df: predict_owned(df.user_id, df.item_name),axis=1)
    
    return accuracy_score(y,y_pred)

In [45]:
%%time
test_baseline(20_000)

CPU times: total: 26min 5s
Wall time: 26min 22s


0.8496

# Collaborative Filtering Models

In [17]:
userToIdx, gameToIdx = {}, {}

for user in interaction_matrix.index:
    userToIdx[user] = len(userToIdx)    
for game in interaction_matrix.columns:
    gameToIdx[game] = len(gameToIdx)
    
idxToUser = {v: k for k, v in userToIdx.items()}
idxToGame = {v: k for k, v in gameToIdx.items()}

## Helper Functions

In [18]:
def predict_improved(user, game, model, n, curr_Xui):
    
    if user not in userToIdx.keys():
        return game in top_games
    
    recommended = model.recommend(userid = userToIdx[user], 
                                  user_items = scipy.sparse.csr_matrix(curr_Xui.T[userToIdx[user]]),
                                  N = n)
    recommended_games = [idxToGame[game_idx] for game_idx in recommended[0]]
    if game in recommended_games:
        return True
    return False

In [19]:
def test_improved(n, model, recc_size, curr_Xui):
    
    pos = X_test[X_test['owned']==1].iloc[:int(n/2)]
    neg = X_test[X_test['owned']==0].iloc[:int(n/2)]
    test_data = pd.concat([pos, neg])
    
    X = test_data.drop(columns=['owned'])
    
    y = test_data['owned']
    
    y_pred = X.apply(lambda df: predict_improved(df.user_id, df.item_name, model, recc_size, curr_Xui),axis=1)
    
    return accuracy_score(y,y_pred)
    

## Binary Interaction Models

In [20]:
Xiu = scipy.sparse.csr_matrix(interaction_matrix.values)    
Xui = scipy.sparse.csr_matrix(Xiu.T)

### Binary Interaction Bayesian Personalized Ranking Model

In [21]:
%%time
bpr_model = bpr.BayesianPersonalizedRanking(factors = 10, 
                                            random_state=42,
                                            num_threads=0)
bpr_model.fit(Xiu)

  0%|          | 0/100 [00:00<?, ?it/s]

CPU times: total: 2min 30s
Wall time: 10.2 s


In [46]:
%%time
test_improved(20_000, bpr_model, 1000, Xui)

CPU times: total: 13min 33s
Wall time: 1min 42s


0.7953

### Binary Interaction Alternating Least Squares Model

In [23]:
%%time
als_model = als.AlternatingLeastSquares(factors = 10, 
                                        random_state=42,
                                        num_threads=0)
als_model.fit(Xiu)

  0%|          | 0/15 [00:00<?, ?it/s]

CPU times: total: 27.1 s
Wall time: 1.87 s


In [47]:
%%time
test_improved(20_000, als_model, 1000, Xui)

CPU times: total: 13min 16s
Wall time: 1min 40s


0.87245

## Implicit Rating Models

In [25]:
cont_Xiu = scipy.sparse.csr_matrix(cont_interaction_matrix.values)    
cont_Xui = scipy.sparse.csr_matrix(cont_Xiu.T)

### Implicit Rating Bayesian Personalized Ranking Model

In [26]:
%%time
cont_bpr_model = bpr.BayesianPersonalizedRanking(factors = 10, 
                                                 random_state=42,
                                                 num_threads=0)
cont_bpr_model.fit(cont_Xiu)

  0%|          | 0/100 [00:00<?, ?it/s]

CPU times: total: 2min 33s
Wall time: 10.5 s


In [48]:
%%time
test_improved(20_000, cont_bpr_model, 1000, cont_Xui)

CPU times: total: 13min 20s
Wall time: 1min 40s


0.7961

### Implicit Rating Alternating Least Squares Model

In [28]:
%%time
cont_als_model = als.AlternatingLeastSquares(factors = 10, 
                                             random_state=42,
                                             num_threads=0)
cont_als_model.fit(cont_Xiu)

  0%|          | 0/15 [00:00<?, ?it/s]

CPU times: total: 31.2 s
Wall time: 2.19 s


In [49]:
%%time
test_improved(20_000, cont_als_model, 1000, cont_Xui)

CPU times: total: 13min 16s
Wall time: 1min 40s


0.8746

# Hyperparameter Tuning

In [30]:
def train_bpr_model(curr_Xiu, factors, learning_rate, regularization, iterations):
    model = bpr.BayesianPersonalizedRanking(factors = factors, 
                                            learning_rate = learning_rate,
                                            regularization = regularization,
                                            iterations = iterations, # 100
                                            random_state=42,
                                            num_threads=0)
    model.fit(curr_Xiu)
    return model


def train_als_model(curr_Xiu, factors, regularization, iterations):
    model = als.AlternatingLeastSquares(factors = factors, 
                                        regularization = regularization,
                                        iterations = iterations, # 15
                                        random_state=42,
                                        num_threads=0)
    model.fit(curr_Xiu)
    return model

In [31]:
als_param_grid = {'factors': [5, 10, 25, 50, 75, 100],
                  'n_recommended':[1000],
                  'regularization': [0, 0.001, 0.01, 0.1, 1, 10, 20, 35, 50, 60, 75, 90, 100],
                  'iterations': [5, 10, 15, 25, 35]}

# 390x

In [32]:
%%time
als_dict = {}
counter = 0

for factor in als_param_grid['factors']:
    for recc_size in als_param_grid['n_recommended']:
        for reg in als_param_grid['regularization']:
            for i in als_param_grid['iterations']:
                
                model = train_als_model(cont_Xiu, factor, reg, i)
                acc = test_improved(20_000, model, recc_size, cont_Xui)
                als_dict[counter] = (model, acc, [factor, recc_size, reg, i])
                counter += 1
                
[acc for model, acc, params in als_dict.values()]                

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

CPU times: total: 3d 13h 46min 16s
Wall time: 10h 27min 22s


[0.8668,
 0.86785,
 0.8677,
 0.8676,
 0.86805,
 0.8668,
 0.86795,
 0.86775,
 0.86765,
 0.86805,
 0.8668,
 0.868,
 0.86765,
 0.8677,
 0.8681,
 0.86655,
 0.8678,
 0.8676,
 0.86785,
 0.8681,
 0.8664,
 0.8691,
 0.86785,
 0.8679,
 0.868,
 0.86645,
 0.86795,
 0.86805,
 0.86795,
 0.8679,
 0.8661,
 0.86655,
 0.86875,
 0.86815,
 0.86815,
 0.86615,
 0.8666,
 0.86855,
 0.8682,
 0.8685,
 0.86665,
 0.86715,
 0.8672,
 0.8681,
 0.8684,
 0.867,
 0.86685,
 0.86735,
 0.8681,
 0.86825,
 0.8665,
 0.86695,
 0.8677,
 0.8678,
 0.8685,
 0.86615,
 0.8682,
 0.86865,
 0.86865,
 0.8686,
 0.8665,
 0.8679,
 0.86845,
 0.8685,
 0.8686,
 0.87495,
 0.87485,
 0.8746,
 0.87385,
 0.87335,
 0.87495,
 0.8749,
 0.87455,
 0.8739,
 0.8735,
 0.87485,
 0.87485,
 0.8746,
 0.87335,
 0.87375,
 0.8753,
 0.87445,
 0.8744,
 0.87385,
 0.87475,
 0.8749,
 0.87425,
 0.8746,
 0.87475,
 0.8748,
 0.8739,
 0.874,
 0.87335,
 0.87475,
 0.8752,
 0.8753,
 0.8753,
 0.87465,
 0.87335,
 0.8743,
 0.87535,
 0.87495,
 0.8756,
 0.8749,
 0.8761,
 0.87565

In [33]:
np.array([acc for model, acc, params in als_dict.values()]).max()

0.8893

In [34]:
np.array([params for model, acc, params in als_dict.values()])[np.where(np.array([acc for model, acc, params in als_dict.values()]) == np.array([acc for model, acc, params in als_dict.values()]).max())[0][0]]

array([ 100., 1000.,  100.,   15.])

In [50]:
%%time
#model = train_als_model(cont_Xiu, factor, reg, iter)
final_model = train_als_model(cont_Xiu, 100, 100, 15)
test_improved(X_test.shape[0], final_model, 1000, cont_Xui)

  0%|          | 0/15 [00:00<?, ?it/s]

CPU times: total: 18h 57min 10s
Wall time: 2h 23min 36s


0.8930973064888

# Run Recommendations

In [None]:
def recommend_games(user, n_games, date_range, tags_include, tags_exclude):
    
    true_recommendations_count = 1000
    final_recommended = final_model.recommend(userid = userToIdx[query_user], 
                                              user_items = scipy.sparse.csr_matrix(cont_Xui.T[userToIdx[query_user]]),
                                              N = true_recommendations_count)
    final_recommended_games = [idxToGame[game_idx] for game_idx in final_recommended[0]]
    final_recommended_ranks = final_recommended[1]
    
    game_recommendations = pd.DataFrame({'game':final_recommended_games,
                                         'rank': final_recommended_ranks})
    
    recommendations_with_tags = game_recommendations.merge(games, left_on='game', right_on='app_name')[['game','rank', 'release_date', 'url'] + tags_include + tags_exclude]
    # filter date
    recommendations_with_tags = recommendations_with_tags[recommendations_with_tags['release_date'] > date_range[0]]
    recommendations_with_tags = recommendations_with_tags[recommendations_with_tags['release_date'] < date_range[1]]
    # filter tags
    for tag in tags_include:
        recommendations_with_tags = recommendations_with_tags[recommendations_with_tags[tag]==1]
    for tag in tags_exclude:
        recommendations_with_tags = recommendations_with_tags[recommendations_with_tags[tag]==0]
        
    return recommendations_with_tags.head(n_games_to_recommend)#[['game', 'url']]

In [None]:
query_user = '--000--'
n_games_to_recommend = 20
tags_include = ['Action']
tags_exclude = ['Multiplayer']
# datetime (year, month, day)
date_range = [datetime(2010,1,1), datetime(2022,1,1)]

In [None]:
recommend_games(query_user, n_games_to_recommend, date_range, tags_include, tags_exclude)