In [1]:
# Packages for data manipulation
import pandas as pd
import numpy as np

# Packages used to create the game recommender 
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import StandardScaler

# Data visualization packages
import matplotlib.pyplot as plt 
import seaborn as sns


%matplotlib inline

In [2]:
# Importing Steam data from 2016 into a pandas dataframe

df = pd.read_csv('./steam_data.csv')

In [3]:
# Inspecting the dataframe

df.head()

Unnamed: 0,User ID,Game Title,Hours Played
0,151603712,The Elder Scrolls V Skyrim,273.0
1,151603712,Fallout 4,87.0
2,151603712,Spore,14.9
3,151603712,Fallout New Vegas,12.1
4,151603712,Left 4 Dead 2,8.9


In [4]:
# Checking the shape of the dataframe

df.shape

(70489, 3)

In [5]:
# Removing entries with less than 10 hours played

df = df[df['Hours Played'] > 10]

In [6]:
# Creating a pivot table 

pivot = pd.pivot_table(df, index='User ID',columns='Game Title',values='Hours Played')

In [7]:
# Replacing NaN's with 0's

pivot.fillna(0,inplace=True)

In [8]:
# Checking shape of pivot table

pivot.shape

(6411, 1692)

In [9]:
# Scaling my data

ss = StandardScaler()
ss.fit_transform(pivot)

array([[-0.01249025, -0.01763289, -0.01249025, ..., -0.02157166,
        -0.01862415, -0.01249025],
       [-0.01249025, -0.01763289, -0.01249025, ..., -0.02157166,
        -0.01862415, -0.01249025],
       [-0.01249025, -0.01763289, -0.01249025, ..., -0.02157166,
        -0.01862415, -0.01249025],
       ...,
       [-0.01249025, -0.01763289, -0.01249025, ..., -0.02157166,
        -0.01862415, -0.01249025],
       [-0.01249025, -0.01763289, -0.01249025, ..., -0.02157166,
        -0.01862415, -0.01249025],
       [-0.01249025, -0.01763289, -0.01249025, ..., -0.02157166,
        -0.01862415, -0.01249025]])

In [10]:
# Using pairwise distance to calculate correlation between games. I tried various metrics including: euclidean,hamming, etc.
# Spearman's correlation created the most accurate recommendations

distances = 1-pairwise_distances(pivot.T, metric='correlation')

In [11]:
# Putting distances in a df 

distance_df = pd.DataFrame(distances,index=pivot.columns, columns=pivot.columns)

In [12]:
# Checking the df to verify everything worked okay

distance_df.head(2)

Game Title,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),100% Orange Juice,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,3DMark,3DMark Vantage,4 Elements,7 Days to Die,8BitBoy,A Game of Thrones - Genesis,...,Zombie Army Trilogy,Zombie Panic Source,Zombies Monsters Robots,Zuma's Revenge,iBomber Defense Pacific,liteCam Game 100 FPS Game Capture,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,theHunter,theHunter Primal
Game Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),1.0,-0.00022,-0.000156,-0.000156,-0.000178,-0.000156,-0.000194,-0.000612,-0.000156,-0.000216,...,-0.000156,-0.000293,-0.000314,-0.000156,-0.000156,-0.000156,-0.000156,-0.000269,-0.000233,-0.000156
100% Orange Juice,-0.00022,1.0,-0.00022,-0.00022,-0.000251,-0.00022,-0.000274,-0.000864,-0.00022,-0.000305,...,-0.00022,-0.000413,-0.000443,-0.00022,-0.00022,-0.00022,-0.00022,-0.00038,-0.000328,-0.00022


In [13]:
# Viewing recommendations for a game

query = "Fallout 3"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Unreal Gold                                              0.637160
Painkiller Gold Edition                                  0.637160
Commandos 2 Men of Courage                               0.494411
Unreal Tournament 2004                                   0.405676
Red Faction Guerrilla Steam Edition                      0.390287
Sid Meier's Civilization IV                              0.389783
The Elder Scrolls IV Oblivion                            0.379748
Hack 'n' Slash                                           0.304078
Dungeons & Dragons Online                                0.270900
GALAK-Z                                                  0.266011
The Maw                                                  0.266011
ToCA Race Driver 3                                       0.266011
Child of Light                                           0.251926
Magic The Gathering - Duels of the Planeswalkers 2013    0.242364
Name: Fallout 3, dtype: float64

In [14]:
# Viewing recommendations for a game

query = "Path of Exile"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Divinity II - Ego Draconis                 0.351707
Bound By Flame                             0.351707
Dawn of Discovery                          0.351707
Galactic Civilizations III                 0.319351
Elite Dangerous                            0.298879
DOOM 3 Resurrection of Evil                0.270005
Blitzkrieg 2 Anthology                     0.243070
Tom Clancy's Ghost Recon Future Soldier    0.223200
Hitman Blood Money                         0.222424
Total War ATTILA                           0.211427
Earth 2160                                 0.205234
Lords Of The Fallen                        0.203774
Fallout Tactics                            0.202672
RaiderZ                                    0.191444
Name: Path of Exile, dtype: float64

In [15]:
# Viewing recommendations for a game

query = "Dota 2"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Company of Heroes Opposing Fronts             0.108813
Dark Messiah of Might & Magic Multi-Player    0.098358
District 187                                  0.093673
Titan Quest                                   0.085531
I Am Alive                                    0.074279
Alan Wake                                     0.070576
Arctic Combat                                 0.068095
Serious Sam 2                                 0.066939
Warhammer 40,000 Armageddon                   0.063706
Space Rangers HD A War Apart                  0.062708
Darksiders                                    0.060948
Counter-Strike Global Offensive               0.060261
McPixel                                       0.058563
Nosgoth                                       0.058436
Name: Dota 2, dtype: float64

In [16]:
# Viewing recommendations for a game

query = "Counter-Strike Global Offensive"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Counter-Strike Source                       0.236037
Call of Duty Black Ops - Multiplayer OSX    0.234877
Pro Cycling Manager 2015                    0.224024
TrackMania United                           0.211513
Airline Tycoon 2                            0.211513
Pro Cycling Manager 2013                    0.197069
KickBeat Steam Edition                      0.182927
Rust                                        0.160731
Call of Duty Black Ops - OSX                0.153766
Nidhogg                                     0.141361
Two Worlds II                               0.120660
Garry's Mod                                 0.110461
GRID 2                                      0.107959
DayZ                                        0.107256
Name: Counter-Strike Global Offensive, dtype: float64

In [17]:
# Viewing recommendations for a game

query = "Shovel Knight"
query = [col for col in distance_df.columns if query in col][0]
distance_df[query].sort_values(ascending=False)[1:15]

Game Title
Glare                           0.713678
Escape Goat                     0.713678
Millie                          0.713678
Rooms The Unsolvable Puzzle     0.713678
Bard's Gold                     0.713678
Canyon Capers                   0.713678
Super Panda Adventures          0.713678
Scarygirl                       0.713678
Freedom Fall                    0.713678
Zack Zero                       0.713678
Breezeblox                      0.713678
The Cave                        0.701772
Cave Story+                     0.700670
Giana Sisters Twisted Dreams    0.620576
Name: Shovel Knight, dtype: float64