In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from surprise import Dataset, Reader
from surprise import accuracy
from surprise.model_selection import cross_validate, train_test_split
from surprise.prediction_algorithms import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder

In [12]:
fps_main = pd.read_csv('../../../data/fps_main.csv')
fps_main

Unnamed: 0,steamid,appid,app_title,app_tags,review,fps,voted_up
0,76561198865552498,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...",very good game,True,1
1,76561197964533061,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...",beause S,True,1
2,76561198290998839,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...","Russians everywhere, do not recommend",True,1
3,76561198073021168,60,Ricochet,"['Action', 'FPS', 'Multiplayer', 'Classic', 'F...","best game, best game, 10/10 i r8 8/8",True,1
4,76561198061142423,550,Left 4 Dead 2,"['Zombies', 'Co-op', 'FPS', 'Multiplayer', 'Sh...",køb hvis du kan lide zombie spil ;D,True,1
...,...,...,...,...,...,...,...
19675,76561198174234215,70,Half-Life,"['FPS', 'Sci-fi', 'Action', 'Singleplayer', ""1...",Honestly does not hold up for me. Go buy Black...,True,0
19676,76561198399192985,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...",bad comunity. If you want to try hard at this ...,True,0
19677,76561197974601384,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...",Dont start,True,0
19678,76561198204888087,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...",Why the hell does this game have a battle roya...,True,0


In [13]:
fps_rs = fps_main.drop(['appid', 'review', 'fps'], axis=1)

In [16]:
fps_rs = fps_rs.drop('app_tags', axis=1)

In [17]:
reader = Reader(rating_scale=(0,1), line_format='user item rating', sep=',')

In [18]:
user_data = Dataset.load_from_df(fps_rs, reader)

In [19]:
trainset, testset = train_test_split(user_data, test_size=.3)

In [20]:
svd = SVD()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x164678df910>

In [21]:
predictions = svd.test(testset)
accuracy.rmse(predictions)

RMSE: 0.4775


0.4775415073157216

In [22]:
from surprise import BaselineOnly

In [23]:
cross_validate(KNNBasic(), user_data, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.4652  0.4655  0.4668  0.4651  0.4652  0.4655  0.0006  
MAE (testset)     0.4315  0.4321  0.4346  0.4315  0.4307  0.4321  0.0013  
Fit time          16.78   15.55   14.17   18.07   21.51   17.21   2.51    
Test time         6.11    5.89    6.13    6.36    5.93    6.09    0.17    


{'test_rmse': array([0.46523623, 0.46549689, 0.46679207, 0.46505795, 0.46515726]),
 'test_mae': array([0.4314564 , 0.43206115, 0.43456905, 0.43146235, 0.43074453]),
 'fit_time': (16.777573108673096,
  15.549940586090088,
  14.168081998825073,
  18.06694769859314,
  21.506389617919922),
 'test_time': (6.108498573303223,
  5.894718647003174,
  6.129546403884888,
  6.362644672393799,
  5.9343109130859375)}

In [24]:
cross_validate(BaselineOnly(), user_data, verbose=True)

Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Estimating biases using als...
Evaluating RMSE, MAE of algorithm BaselineOnly on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.4781  0.4757  0.4740  0.4773  0.4778  0.4766  0.0015  
MAE (testset)     0.4604  0.4577  0.4567  0.4592  0.4608  0.4590  0.0016  
Fit time          0.07    0.07    0.07    0.07    0.07    0.07    0.00    
Test time         0.04    0.03    0.04    0.03    0.04    0.03    0.00    


{'test_rmse': array([0.47805365, 0.47569734, 0.47402647, 0.47733905, 0.47783075]),
 'test_mae': array([0.4604216 , 0.45773284, 0.45665225, 0.45918751, 0.46081879]),
 'fit_time': (0.07436823844909668,
  0.0702371597290039,
  0.06700325012207031,
  0.0729978084564209,
  0.07000494003295898),
 'test_time': (0.03700613975524902,
  0.03100442886352539,
  0.03900718688964844,
  0.030002355575561523,
  0.036008358001708984)}

In [25]:
sim_options = {
    "name": ["msd", "cosine"],
    "min_support": [1, 2, 3, 4],
    "user_based": [False, True],
}

param_grid = {"sim_options": sim_options}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(user_data)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

  sim = construction_func[name](*args)


Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computi

In [30]:
knnmeans = KNNWithMeans(sim_options={'name': 'msd',
                                    'user_based': False,
                                    'min_support': 2})

In [31]:
knnmeans.fit(trainset)
knnmeans.sim

Computing the msd similarity matrix...
Done computing similarity matrix.


array([[1. , 0. , 0.5, ..., 0. , 0. , 0. ],
       [0. , 1. , 0. , ..., 0. , 0. , 0. ],
       [0.5, 0. , 1. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 1. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 1. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 1. ]])

In [32]:
predictions = knnmeans.test(testset)
print(accuracy.rmse(predictions))

RMSE: 0.4681
0.4680974809223165


In [23]:
sim_pearson = {'name':'pearson', 'user_based':False, 'min_support': 2}
knn_baseline = KNNBaseline(sim_options=sim_pearson)
knn_baseline.fit(trainset)

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBaseline at 0x16454460a60>

In [24]:
knn_baseline.sim

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [25]:
predictions = knn_baseline.test(testset)
print(accuracy.rmse(predictions))

RMSE: 0.4547
0.45471482737637564


In [31]:
steamid= 1
preds = []
for app_title in fps_rs.app_title.unique():
    score = knn_baseline.predict(steamid, app_title).est
    preds.append((app_title, score))

In [32]:
preds

[('Counter-Strike: Global Offensive', 0.4234106296711539),
 ('Ricochet', 0.46605322425428),
 ('Left 4 Dead 2', 0.7625581571199732),
 ('Half-Life 2', 0.8042024330418436),
 ('Ultimate Doom', 0.6113265316583864),
 ('Day of Defeat: Source', 0.5255996800311558),
 ('Portal 2', 0.8740650629341153),
 ('Half-Life 2: Episode One', 0.6149726907932171),
 ('DOOM II', 0.5859110194600499),
 ('Killing Floor', 0.7083394938107219),
 ('QUAKE', 0.6301155806080905),
 ('Counter-Strike: Source', 0.6607936999127462),
 ('Portal', 0.8501013750049128),
 ('QUAKE II', 0.4490938403910708),
 ('Team Fortress 2', 0.5534281156091859),
 ('Left 4 Dead', 0.6740872231672107),
 ('Half-Life', 0.7083910436032981),
 ('Half-Life 2: Deathmatch', 0.41802189152605546),
 ('Team Fortress Classic', 0.4422715762446968),
 ('Half-Life: Blue Shift', 0.5835058907497811),
 ('Half-Life 2: Episode Two', 0.7118212918848671),
 ('Counter-Strike', 0.7037406833270992),
 ('Counter-Strike: Condition Zero', 0.5403940018013103),
 ('Judge Dredd: Dredd

In [40]:
n = 5
recommendations = sorted(preds, key=lambda x: x[1], reverse=True)[:5]

In [41]:
ranked_movies = sorted(preds, key=lambda x: x[1], reverse=True)

In [56]:
def recommended_games(app_title):
    preds = []
    for app_title in fps_rs.app_title.unique():
        score = knn_baseline.predict(steamid, app_title).est
        preds.append((app_title, score))
    n = 5
    recommendations = sorted(preds, key=lambda x: x[1], reverse=True)[:5]
    return recommendations

In [66]:
recommended_games('Call of Duty®')

[('Portal 2', 0.8740650629341153),
 ('Portal', 0.8501013750049128),
 ('Half-Life 2', 0.8042024330418436),
 ('Left 4 Dead 2', 0.7625581571199732),
 ('Half-Life 2: Episode Two', 0.7118212918848671)]