In [1]:
# Packages for data manipulation
import pandas as pd
import numpy as np

# Packages used to create the game recommender 
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import StandardScaler

# Data visualization packages
import matplotlib.pyplot as plt 
import seaborn as sns


%matplotlib inline

In [2]:
# Importing Steam data from 2016 into a pandas dataframe

df = pd.read_csv('./steam_data.csv')

In [3]:
# Inspecting the dataframe

df.head()

Unnamed: 0,User ID,Game Title,Hours Played
0,151603712,The Elder Scrolls V Skyrim,273.0
1,151603712,Fallout 4,87.0
2,151603712,Spore,14.9
3,151603712,Fallout New Vegas,12.1
4,151603712,Left 4 Dead 2,8.9


In [4]:
# Checking the shape of the dataframe

df.shape

(70489, 3)

In [5]:
# Removing entries with less than 20 hours played to remove 1 hour outliers.

df = df[df['Hours Played'] > 20]

In [6]:
# Creating a pivot table 

pivot = pd.pivot_table(df, index='User ID',columns='Game Title',values='Hours Played')

In [7]:
# Replacing NaN's with 0's

pivot.fillna(0,inplace=True)

In [8]:
# Checking shape of pivot table

pivot.shape

(5329, 1228)

In [9]:
# Scaling my data

ss = StandardScaler()
ss.fit_transform(pivot)

array([[-0.01934092, -0.01561408, -0.01369992, ..., -0.01369992,
        -0.01727495, -0.01369992],
       [-0.01934092, -0.01561408, -0.01369992, ..., -0.01369992,
        -0.01727495, -0.01369992],
       [-0.01934092, -0.01561408, -0.01369992, ..., -0.01369992,
        -0.01727495, -0.01369992],
       ...,
       [-0.01934092, -0.01561408, -0.01369992, ..., -0.01369992,
        -0.01727495, -0.01369992],
       [-0.01934092, -0.01561408, -0.01369992, ..., -0.01369992,
        -0.01727495, -0.01369992],
       [-0.01934092, -0.01561408, -0.01369992, ..., -0.01369992,
        -0.01727495, -0.01369992]])

In [10]:
# Using pairwise distance to calculate correlation between games. I tried various metrics including: euclidean,hamming, etc.
# Spearman's correlation created the most accurate recommendations

distances = 1-pairwise_distances(pivot.T, metric='correlation')

In [11]:
# Putting distances in a df 

distance_df = pd.DataFrame(distances,index=pivot.columns, columns=pivot.columns)

In [12]:
# Checking the df to verify everything worked okay

distance_df.head(2)

Game Title,100% Orange Juice,3DMark,3DMark Vantage,4 Elements,7 Days to Die,8BitBoy,A Game of Thrones - Genesis,A.V.A - Alliance of Valiant Arms,AKIBA'S TRIP Undead & Undressed,APB Reloaded,...,You Must Build A Boat,You Need A Budget 4 (YNAB),Ys Origin,Zombie Panic Source,Zombies Monsters Robots,Zuma's Revenge,liteCam Game 100 FPS Game Capture,resident evil 4 / biohazard 4,theHunter,theHunter Primal
Game Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
100% Orange Juice,1.0,-0.000302,-0.000265,-0.000265,-0.00098,-0.000265,-0.000265,-0.000265,-0.000265,-0.000467,...,-0.000265,-0.000346,-0.000265,-0.000477,-0.000359,-0.000265,-0.000265,-0.000265,-0.000334,-0.000265
3DMark,-0.000302,1.0,0.988505,-0.000214,-0.000791,-0.000214,-0.000214,-0.000214,-0.000214,-0.000377,...,-0.000214,-0.000279,-0.000214,-0.000385,-0.00029,-0.000214,-0.000214,-0.000214,-0.00027,-0.000214


In [13]:
# Viewing recommendations for a game

query = "Fallout 3"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Commandos 2 Men of Courage                               0.501064
Red Faction Guerrilla Steam Edition                      0.413284
Sid Meier's Civilization IV                              0.399437
The Elder Scrolls IV Oblivion                            0.385708
Dungeons & Dragons Online                                0.275359
The Maw                                                  0.269582
GALAK-Z                                                  0.269582
Magic The Gathering - Duels of the Planeswalkers 2013    0.262619
The Last Remnant                                         0.213352
Marvel Puzzle Quest                                      0.192523
The Elder Scrolls III Morrowind                          0.190282
Gems of War                                              0.185458
Kingdoms of Amalur Reckoning                             0.177514
Magic The Gathering  Duels of the Planeswalkers 2012     0.159221
Name: Fallout 3, dtype: float64

In [14]:
# Viewing recommendations for a game

query = "Path of Exile"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Bound By Flame                                         0.351856
Divinity II - Ego Draconis                             0.351856
Galactic Civilizations III                             0.319710
Elite Dangerous                                        0.297479
Earth 2160                                             0.243138
Tom Clancy's Ghost Recon Future Soldier                0.226724
Total War ATTILA                                       0.211502
Fallout Tactics                                        0.208190
The Wolf Among Us                                      0.205204
Star Wars The Force Unleashed Ultimate Sith Edition    0.192394
King's Bounty The Legend                               0.189610
Aquaria                                                0.174404
Lords Of The Fallen                                    0.167703
NOBUNAGA'S AMBITION Sphere of Influence                0.164076
Name: Path of Exile, dtype: float64

In [15]:
# Viewing recommendations for a game

query = "Dota 2"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Company of Heroes Opposing Fronts             0.109541
Dark Messiah of Might & Magic Multi-Player    0.098527
Titan Quest                                   0.092437
I Am Alive                                    0.074373
Alan Wake                                     0.071014
Nuclear Dawn                                  0.063504
Darksiders                                    0.062931
Dungeon Defenders II                          0.061327
Everlasting Summer                            0.057806
Nosgoth                                       0.057119
Devilian                                      0.056630
Global Agenda - Beta                          0.053776
Warhammer End Times - Vermintide              0.053564
Resident Evil / biohazard HD REMASTER         0.052629
Name: Dota 2, dtype: float64

In [16]:
# Viewing recommendations for a game

query = "Counter-Strike Global Offensive"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Call of Duty Black Ops - Multiplayer OSX    0.235565
Counter-Strike Source                       0.231499
Pro Cycling Manager 2015                    0.224438
TrackMania United                           0.212074
KickBeat Steam Edition                      0.199585
Pro Cycling Manager 2013                    0.197309
Rust                                        0.155427
Nidhogg                                     0.141542
Two Worlds II                               0.121167
Garry's Mod                                 0.103960
GRID 2                                      0.102362
DayZ                                        0.101925
Medal of Honor(TM) Multiplayer              0.098457
AdVenture Capitalist                        0.098017
Name: Counter-Strike Global Offensive, dtype: float64

In [17]:
# Viewing recommendations for a game

query = "Shovel Knight"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Breezeblox                      0.726609
Rooms The Unsolvable Puzzle     0.726609
Freedom Fall                    0.726609
Scarygirl                       0.726609
Cave Story+                     0.726609
Bard's Gold                     0.726609
Glare                           0.726609
The Cave                        0.726609
Escape Goat                     0.726609
Giana Sisters Twisted Dreams    0.631817
Ori and the Blind Forest        0.441283
The Stanley Parable             0.370565
Gang Beasts                     0.370565
Environmental Station Alpha     0.370565
Name: Shovel Knight, dtype: float64