In [1]:
# Packages for data manipulation
import pandas as pd
import numpy as np

# Packages used to create the game recommender 
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import StandardScaler

# Data visualization packages
import matplotlib.pyplot as plt 
import seaborn as sns


%matplotlib inline

In [2]:
# Importing Steam data from 2016 into a pandas dataframe

df = pd.read_csv('./steam_data.csv')

In [3]:
# Inspecting the dataframe

df.head()

Unnamed: 0,User ID,Game Title,Hours Played
0,151603712,The Elder Scrolls V Skyrim,273.0
1,151603712,Fallout 4,87.0
2,151603712,Spore,14.9
3,151603712,Fallout New Vegas,12.1
4,151603712,Left 4 Dead 2,8.9


In [4]:
# Checking the shape of the dataframe

df.shape

(70489, 3)

In [7]:
# Creating a pivot table

pivot = pd.pivot_table(df, index='User ID',columns='Game Title',values='Hours Played')

In [8]:
# Replacing NaN's with 0's

pivot.fillna(0,inplace=True)

In [9]:
# Checking shape of pivot table

pivot.shape

(11350, 3600)

In [10]:
# Using pairwise distance to calculate correlation between games. I tried various metrics including: euclidean,hamming, etc.
# Spearman's correlation created the most accurate recommendations

distances = 1-pairwise_distances(pivot.T, metric='correlation')

In [11]:
# Putting distances in a df for visibility

distance_df = pd.DataFrame(distances,index=pivot.columns, columns=pivot.columns)

In [12]:
# Checking the df to verify everything worked okay

distance_df.head()

Game Title,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
Game Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 Legends,1.0,-0.000144,-0.00014,-9.6e-05,-8.8e-05,-0.000147,-8.8e-05,-0.000202,-0.000162,-0.000132,...,-8.8e-05,-8.8e-05,-0.000108,-0.00012,-0.000232,-8.8e-05,-0.000413,-0.000224,-0.000261,-0.000105
0RBITALIS,-0.000144,1.0,-0.000229,0.075136,-0.000144,-0.00024,-0.000144,-0.000329,-0.000264,-0.000215,...,-0.000144,-0.000144,-0.000177,-0.000197,-0.000379,-0.000144,-0.000675,-0.000366,0.00349,-0.000172
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),-0.00014,-0.000229,1.0,-0.000153,-0.00014,-0.000234,-0.00014,-0.000321,-0.000258,-0.00021,...,-0.00014,-0.00014,-0.000172,-0.000192,-0.000369,-0.00014,0.000922,-0.000357,-0.000416,-0.000168
10 Second Ninja,-9.6e-05,0.075136,-0.000153,1.0,-9.6e-05,-0.00016,-9.6e-05,-0.000219,-0.000176,-0.000143,...,-9.6e-05,-9.6e-05,-0.000118,-0.000131,-0.000252,-9.6e-05,-0.000449,-0.000244,-0.000284,-0.000115
10000000,-8.8e-05,-0.000144,-0.00014,-9.6e-05,1.0,-0.000147,-8.8e-05,-0.000202,-0.000162,-0.000132,...,-8.8e-05,-8.8e-05,-0.000108,-0.00012,-0.000232,-8.8e-05,-0.000413,-0.000224,-0.000261,-0.000105


In [13]:
# Viewing recommendations for a game

query = "Fallout 3"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
The Moon Sliver                               0.633702
Painkiller Gold Edition                       0.633702
Monster Bash                                  0.633702
Bientt l't                                    0.633702
Psychonauts Demo                              0.633702
Eradicator                                    0.633702
Dark Void Zero                                0.633702
Quarries of Scred                             0.633702
Life of Pixel                                 0.633702
The Graveyard                                 0.633702
10,000,000                                    0.633702
Dodge                                         0.633702
Unreal Gold                                   0.629485
Stubbs the Zombie in Rebel Without a Pulse    0.625795
Name: Fallout 3, dtype: float64

In [14]:
# Viewing recommendations for a game

query = "Path of Exile"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Lineage II                                 0.382958
Dawn of Discovery                          0.350961
Bound By Flame                             0.349216
Divinity II - Ego Draconis                 0.323681
Galactic Civilizations III                 0.319606
Elite Dangerous                            0.299791
DOOM 3 Resurrection of Evil                0.269871
Flashback                                  0.254374
Blitzkrieg 2 Anthology                     0.242613
Game Character Hub                         0.231761
Beyond Divinity                            0.228390
Evolution RTS                              0.227193
Tom Clancy's Ghost Recon Future Soldier    0.222174
Total War ATTILA                           0.213006
Name: Path of Exile, dtype: float64

In [15]:
# Viewing recommendations for a game

query = "Dota 2"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Free to Play                                  0.178891
Road Not Taken                                0.137183
Korwin The Game                               0.128040
Company of Heroes Opposing Fronts             0.108919
Dark Messiah of Might & Magic Multi-Player    0.098414
Titan Quest                                   0.090335
Counter-Strike Global Offensive               0.087414
District 187                                  0.085810
Asteroids Outpost                             0.084676
RoboBlitz                                     0.084676
Fritz Chess 14                                0.075817
I Am Alive                                    0.074361
Arctic Combat                                 0.074264
AZMD! Scorepocalypse                          0.074264
Name: Dota 2, dtype: float64

In [16]:
# Viewing recommendations for a game

query = "Counter-Strike Global Offensive"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Counter-Strike Source                       0.244036
Call of Duty Black Ops - Multiplayer OSX    0.233688
Unium                                       0.233333
Pro Cycling Manager 2015                    0.222015
Governor of Poker 2 Premium Edition         0.210248
18 Wheels of Steel American Long Haul       0.210248
The Walking Dead Survival Instinct          0.210248
TrackMania United                           0.208836
Two Worlds II Castle Defense                0.207141
Postal 3                                    0.201554
TOXIKK                                      0.197976
Pro Cycling Manager 2013                    0.196550
KickBeat Steam Edition                      0.190592
Airline Tycoon 2                            0.187350
Name: Counter-Strike Global Offensive, dtype: float64