In [1]:
import pandas as pd
import sklearn as sk
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("clean.csv", header=0, names=["User", "Game", "PlayTime"])

In [3]:
df.head()

Unnamed: 0,User,Game,PlayTime
0,151603712,The Elder Scrolls V Skyrim,273.0
1,151603712,Fallout 4,87.0
2,151603712,Spore,14.9
3,151603712,Fallout New Vegas,12.1
4,151603712,Left 4 Dead 2,8.9


In [4]:
matrix = df.pivot_table(columns="Game", index="User", values="PlayTime")
matrix

Game,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5250,,,,,,,,,,,...,,,,,,,,,,
76767,,,,,,,,,,,...,,,,,,,,,,
86540,,,,,,,,,,,...,,,,,,,,,,
144736,,,,,,,,,,,...,,,,,,,,,,
181212,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309434439,,,,,,,,,,,...,,,,,,,,,,
309554670,,,,,,,,,,,...,,,,,,,,,,
309626088,,,,,,,,,,,...,,,,,,,,,,
309824202,,,,,,,,,,,...,,,,,,,,,,


In [5]:
df.groupby("Game")["PlayTime"].mean().sort_values(ascending=False).head()

Game
Eastside Hockey Manager              1295.000000
Baldur's Gate II Enhanced Edition     475.255556
FIFA Manager 09                       411.000000
Perpetuum                             400.975000
Football Manager 2014                 391.984615
Name: PlayTime, dtype: float64

In [6]:
df.loc[df["Game"] == "Eastside Hockey Manager"]

Unnamed: 0,User,Game,PlayTime
68581,213854339,Eastside Hockey Manager,1295.0


# Recommendation System using https://medium.com/web-mining-is688-spring-2021/video-game-recommendation-system-b9bcb306bf16

In [7]:
matrix = df.pivot_table(columns='Game', index='User', values='PlayTime', fill_value=0)
matrix

Game,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5250,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76767,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
181212,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309434439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
309554670,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
309626088,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
309824202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
def center(row):
    new_row = (row - row.mean()) / (row.max() - row.min())
    return new_row
matrix_std = matrix.apply(center)

In [9]:
matrix_std

Game,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5250,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
76767,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
86540,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
144736,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
181212,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309434439,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
309554670,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
309626088,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107
309824202,-0.000088,-0.000176,-0.000157,-0.000096,-0.000088,-0.000197,-0.000088,-0.000312,-0.000206,-0.000145,...,-0.000088,-0.000088,-0.00011,-0.000139,-0.000403,-0.000088,-0.000746,-0.000352,-0.000287,-0.000107


In [10]:
def gameRec(g):
    dota = matrix_std[g]
    
    dota = matrix.corrwith(dota).dropna()
    
    gameData = df.groupby("Game").agg({"PlayTime": [np.size, np.mean]})

    gameData.columns = [' '.join(col).strip() for col in gameData.columns.values]

    gameSim = gameData["PlayTime size"] >= 100
    recommendations = gameData[gameSim]

    recommendations = recommendations.join(pd.DataFrame(dota, columns=["similarity"]))

    return recommendations.sort_values(["similarity"], ascending=False)[:5]

In [11]:
gameRec("Half-Life 2")

Unnamed: 0_level_0,PlayTime size,PlayTime mean,similarity
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Half-Life 2,356,11.967135,1.0
Half-Life 2 Episode One,148,5.701351,0.682176
Half-Life 2 Episode Two,141,7.944681,0.636826
Portal,417,5.474341,0.285296
Portal 2,453,20.126049,0.272772


In [12]:
gameRec("DayZ")

Unnamed: 0_level_0,PlayTime size,PlayTime mean,similarity
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DayZ,246,82.582927,1.0
Arma 3,157,153.221019,0.125702
Counter-Strike Global Offensive,1377,234.402033,0.114871
Call of Duty Black Ops - Multiplayer,198,86.588384,0.102779
H1Z1,137,53.753285,0.102001


In [13]:
gameRec("The Elder Scrolls V Skyrim")

Unnamed: 0_level_0,PlayTime size,PlayTime mean,similarity
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
The Elder Scrolls V Skyrim,677,104.710931,1.0
Far Cry 3,181,28.044199,0.226599
Fallout New Vegas,287,51.682578,0.218436
Fallout 4,167,64.447904,0.201649
Deus Ex Human Revolution,138,25.995652,0.190528


In [14]:
matrix_std_transposed = matrix_std.T

cosine_sim = cosine_similarity(matrix_std_transposed)

game_sim_df = pd.DataFrame(cosine_sim, index=matrix_std_transposed.index, columns=matrix_std_transposed.index)

game_sim_df

Game,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 Legends,1.000000,-0.000144,-0.000140,-0.000096,-0.000088,-0.000147,-0.000088,-0.000202,-0.000162,-0.000132,...,-0.000088,-0.000088,-0.000108,-0.000120,-0.000232,-0.000088,-0.000413,-0.000224,-0.000261,-0.000105
0RBITALIS,-0.000144,1.000000,-0.000229,0.075136,-0.000144,-0.000240,-0.000144,-0.000329,-0.000264,-0.000215,...,-0.000144,-0.000144,-0.000177,-0.000197,-0.000379,-0.000144,-0.000675,-0.000366,0.003490,-0.000172
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),-0.000140,-0.000229,1.000000,-0.000153,-0.000140,-0.000234,-0.000140,-0.000321,-0.000258,-0.000210,...,-0.000140,-0.000140,-0.000172,-0.000192,-0.000369,-0.000140,0.000922,-0.000357,-0.000416,-0.000168
10 Second Ninja,-0.000096,0.075136,-0.000153,1.000000,-0.000096,-0.000160,-0.000096,-0.000219,-0.000176,-0.000143,...,-0.000096,-0.000096,-0.000118,-0.000131,-0.000252,-0.000096,-0.000449,-0.000244,-0.000284,-0.000115
10000000,-0.000088,-0.000144,-0.000140,-0.000096,1.000000,-0.000147,-0.000088,-0.000202,-0.000162,-0.000132,...,-0.000088,-0.000088,-0.000108,-0.000120,-0.000232,-0.000088,-0.000413,-0.000224,-0.000261,-0.000105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
rymdkapsel,-0.000088,-0.000144,-0.000140,-0.000096,-0.000088,-0.000147,-0.000088,-0.000202,-0.000162,-0.000132,...,-0.000088,-0.000088,-0.000108,-0.000120,-0.000232,1.000000,-0.000413,-0.000224,-0.000261,-0.000105
sZone-Online,-0.000413,-0.000675,0.000922,-0.000449,-0.000413,-0.000688,-0.000413,-0.000945,-0.000758,-0.000618,...,-0.000413,-0.000413,-0.000508,-0.000564,-0.001087,-0.000413,1.000000,0.038860,0.040288,-0.000494
the static speaks my name,-0.000224,-0.000366,-0.000357,-0.000244,-0.000224,-0.000373,-0.000224,-0.000512,-0.000411,-0.000335,...,-0.000224,-0.000224,-0.000275,-0.000306,-0.000589,-0.000224,0.038860,1.000000,0.001895,-0.000268
theHunter,-0.000261,0.003490,-0.000416,-0.000284,-0.000261,-0.000435,-0.000261,-0.000598,-0.000480,-0.000391,...,-0.000261,-0.000261,-0.000321,-0.000357,-0.000601,-0.000261,0.040288,0.001895,1.000000,0.029433


In [15]:
def get_games(game, playtime):
    sim = game_sim_df[game]*playtime
    sim = sim.sort_values(ascending=False)
    return sim

In [16]:
print(get_games('Half-Life 2', 100))

Game
Half-Life 2                       100.000000
Half-Life 2 Episode One            68.217622
Half-Life 2 Episode Two            63.682597
Portal                             28.529621
Portal 2                           27.277151
                                     ...    
The Mighty Quest For Epic Loot     -0.344571
BLOCKADE 3D                        -0.362376
Football Manager 2016              -0.402572
FreeStyle2 Street Basketball       -0.431248
Football Manager 2015              -0.550063
Name: Half-Life 2, Length: 3600, dtype: float64


In [19]:
gamer_df = pd.DataFrame()

gamer = [('Dishonored', 150), ('Napoleon Total War', 200), ('Spore', 200)]

for game, play in gamer:
    gamer_df = gamer_df._append(get_games(game,play), ignore_index=True)
    gamer_df.sum().sort_values(ascending=False)
    
    
recommended_games = gamer_df.sum().sort_values(ascending=False)
print(recommended_games.head())

Game
Spore                 202.694927
Napoleon Total War    201.181443
Dishonored            154.465176
Empire Total War       97.933771
Total War SHOGUN 2     97.601362
dtype: float64
