### Singular Value Decomposition for user playtimes

In [32]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from surprise import SVDpp, BaselineOnly
from surprise import Reader, Dataset
from surprise import accuracy
from surprise.model_selection import GridSearchCV
from surprise.model_selection import train_test_split

In [18]:
users_df = pd.read_csv('../data/steam_playtime_clean.csv')

In [19]:
# use StandardScaler to scale user playtimes
scaler = StandardScaler()
users_df['playtime_forever'] = scaler.fit_transform(users_df['playtime_forever'].values.reshape(-1, 1))

In [20]:
# instantiate surprise.Reader()
reader = Reader()

# make surprise dataset
data = Dataset.load_from_df(users_df[['steam_id', 'appid', 'playtime_forever']], reader)

In [21]:
# make a training and test set
trainset, testset = train_test_split(data, test_size=0.25)

In [22]:
# do gridsearch on svdpp
param_grid = {'n_factors': [50, 150], 'n_epochs': [20, 40], 'lr_all': [0.005, 0.02], 'reg_all': [0.02, 0.2]}
gs = GridSearchCV(SVDpp, param_grid, measures=['rmse', 'mae'], cv=3)

In [23]:
gs.fit(data)

KeyboardInterrupt: 

In [35]:
svdpp = SVDpp(n_factors=150, n_epochs=40, lr_all=0.02, reg_all=0.2)

In [36]:
fullset = data.build_full_trainset()

In [37]:
svdpp.fit(fullset)

KeyboardInterrupt: 

In [26]:
# grid search params for baselineonly
param_grid = {'bsl_options': {'method': ['als', 'sgd'], 'reg': [0.02, 0.2], 'learning_rate': [0.005, 0.02]}}
gs = GridSearchCV(BaselineOnly, param_grid, measures=['rmse', 'mae'], cv=3)

In [27]:
gs.fit(data)

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...
Estimating biases using sgd...


In [28]:
gs.best_params

{'rmse': {'bsl_options': {'method': 'als',
   'reg': 0.02,
   'learning_rate': 0.005}},
 'mae': {'bsl_options': {'method': 'als',
   'reg': 0.02,
   'learning_rate': 0.005}}}

In [29]:
# make baselineonly using best params
bsl_options = {'method': 'als', 'reg': 0.02, 'learning_rate': 0.005}
algo = BaselineOnly(bsl_options=bsl_options)

In [30]:
algo.fit(trainset)

Estimating biases using als...


<surprise.prediction_algorithms.baseline_only.BaselineOnly at 0x7f1064c4f7c0>

In [33]:
bsl_predictions = algo.fit(trainset).test(testset)

Estimating biases using als...


In [34]:
accuracy.rmse(bsl_predictions)

RMSE: 1.3836


1.3836407094728962