In [1]:
#Import necessary libraries
import pandas as pd 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
#Load Data
steam_df = pd.read_csv('./data/steam-store-games/steam.csv')
#rename column from appid to steam_appid for joining and consistency with the other data sets
steam_df = steam_df.rename(columns={'appid': 'steam_appid'})
steam_df.head(2)

Unnamed: 0,steam_appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99


In [3]:
#Load another dataset
steam_description_df = pd.read_csv('./data/steam-store-games/steam_description_data.csv')
steam_description_df.head()

Unnamed: 0,steam_appid,detailed_description,about_the_game,short_description
0,10,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...
1,20,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...
2,30,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...
3,40,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...
4,50,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...


In [4]:
#Join relevant & useful datasets together on steam_appid column
#steam_df, steam_description_data_df, steam_requirements_df(not sure whether to include), steam_spytag_df
steam_and_description_df = steam_df.merge(steam_description_df, on='steam_appid')
#add game id column
steam_and_description_df['game_id']=steam_and_description_df.index
steam_and_description_df.head()

Unnamed: 0,steam_appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,...,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,detailed_description,about_the_game,short_description,game_id
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,124534,3339,17612,317,10000000-20000000,7.19,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,0
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,3318,633,277,62,5000000-10000000,3.99,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...,1
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,...,3416,398,187,34,5000000-10000000,3.99,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,2
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,1273,267,258,184,5000000-10000000,3.99,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,3
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,...,5250,288,624,415,5000000-10000000,3.99,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,4


In [5]:
#create list of important columns from steam_and_description_df, maybe can focus on 1-2 columns
useful_columns = ['name', 'developer', 'publisher', 'categories', 'genres', 'steamspy_tags', 'detailed_description', 'about_the_game', 'short_description']
steam_and_description_df[useful_columns].head(2)

Unnamed: 0,name,developer,publisher,categories,genres,steamspy_tags,detailed_description,about_the_game,short_description
0,Counter-Strike,Valve,Valve,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...
1,Team Fortress Classic,Valve,Valve,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...


In [6]:
#check for missing values and if it returns false means theres no missing values
steam_and_description_df[useful_columns].isnull().values.any()

False

In [7]:
#function to get important features 
def get_important_features(data):
  important_features=[]
  for i in range(0, data.shape[0]):
    important_features.append(data['name'][i]+' '+data['developer'][i]+' '+data['publisher'][i]+' '+data['categories'][i]+' '+data['genres'][i]+data['steamspy_tags'][i]+' '+data['detailed_description'][i]+' '+data['about_the_game'][i]+' '+data['short_description'][i])

  return important_features

In [8]:
#create a column called important_features in the dataframe to hold the combined strings
steam_and_description_df['important_features'] = get_important_features(steam_and_description_df)
#show the final dataset with the important features column
steam_and_description_df.head(2)

Unnamed: 0,steam_appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,...,negative_ratings,average_playtime,median_playtime,owners,price,detailed_description,about_the_game,short_description,game_id,important_features
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,3339,17612,317,10000000-20000000,7.19,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,0,Counter-Strike Valve Valve Multi-player;Online...
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,633,277,62,5000000-10000000,3.99,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...,1,Team Fortress Classic Valve Valve Multi-player...


In [9]:
#convert the text to a matrix of token counts
cm = CountVectorizer().fit_transform(steam_and_description_df['important_features'])

In [10]:
#get cosine similarity matrix from cm
cs = cosine_similarity(cm)
print(cs)

[[1.         0.37814341 0.25216706 ... 0.15473285 0.14597431 0.16058189]
 [0.37814341 1.         0.3890594  ... 0.24337297 0.22178511 0.25263078]
 [0.25216706 0.3890594  1.         ... 0.24532181 0.27182256 0.27269936]
 ...
 [0.15473285 0.24337297 0.24532181 ... 1.         0.53629666 0.51963466]
 [0.14597431 0.22178511 0.27182256 ... 0.53629666 1.         0.63852447]
 [0.16058189 0.25263078 0.27269936 ... 0.51963466 0.63852447 1.        ]]


In [11]:
#find the number of rows and columns of the cosine similarity matrix
cs.shape

(27075, 27075)

In [12]:
#input query
name = 'Counter-Strike'

#Find the steam app id of the input game
steam_app_id = steam_and_description_df[steam_and_description_df.name == name]['game_id'].values[0]
print(steam_app_id)

0


In [13]:
#Create a list of enumerations for the similarity score
scores = list(enumerate(cs[steam_app_id]))

In [14]:
#sort the list
sorted_scores = sorted(scores, key = lambda x:x[1], reverse = True)
sorted_scores = sorted_scores[1:]

In [15]:
#print first 10 similar games
i = 0
print('The 10 most recommended games to', name, 'are:\n')
for item in sorted_scores:
    game_name = steam_and_description_df[steam_and_description_df.game_id == item[0]].values[0][1]
    print(i+1, game_name)
    i = i + 1
    if i>9:
        break

The 10 most recommended games to Counter-Strike are:

1 Draft Day Sports: College Basketball 2018
2 Draft Day Sports: College Basketball 2017
3 Football Mogul 18
4 Baseball Mogul 2018
5 Sphere Complex
6 Delta Horizon
7 BLOCKADE 3D
8 Staxel
9 Kingdoms Of Marazia
10 Tales Of Glory


In [16]:
#final game recommendation function
def game_recommendation(input_query):
    steam_app_id = steam_and_description_df[steam_and_description_df.name == input_query]['game_id'].values[0]
    #Create a list of enumerations for the similarity score
    scores = list(enumerate(cs[steam_app_id]))
    print(scores[:10])
    #sort the list
    sorted_scores = sorted(scores, key = lambda x:x[1], reverse = True)
    sorted_scores = sorted_scores[1:]
    #print the first 10 similar games
    i = 0
    for item in sorted_scores:
        game_name = steam_and_description_df[steam_and_description_df.game_id == item[0]].values[0][1]
        print(i+1, game_name)
        i = i + 1
        if i>9:
            break

In [17]:
game_recommendation('Counter-Strike')

[(0, 0.9999999999999999), (1, 0.3781434060156702), (2, 0.25216705940593204), (3, 0.22511530062127552), (4, 0.22227065840874616), (5, 0.36687031126222003), (6, 0.27246895362233936), (7, 0.1941855649691472), (8, 0.18760385803235793), (9, 0.22680696344507284)]
1 Draft Day Sports: College Basketball 2018
2 Draft Day Sports: College Basketball 2017
3 Football Mogul 18
4 Baseball Mogul 2018
5 Sphere Complex
6 Delta Horizon
7 BLOCKADE 3D
8 Staxel
9 Kingdoms Of Marazia
10 Tales Of Glory


In [18]:
game_recommendation('Team Fortress Classic')

[(0, 0.3781434060156702), (1, 0.9999999999999996), (2, 0.3890593979069049), (3, 0.3704044508830013), (4, 0.3831002109277553), (5, 0.3647917423452258), (6, 0.31109131539408885), (7, 0.2761220866078094), (8, 0.25992314820823675), (9, 0.34325420438330734)]
1 Obliteracy
2 Dead Rising 4
3 A Fistful of Gun
4 BLOCKADE Classic
5 My Lands: Black Gem Hunting
6 Assassin's Creed® Unity
7 One Ping Only
8 Call of Duty®: Modern Warfare® Remastered
9 忍者村大战2
10 Oracle: Threads of Fate


In [19]:
game_recommendation('Half-Life 2')

[(0, 0.22680696344507284), (1, 0.34325420438330734), (2, 0.3854087985035805), (3, 0.39692790173563663), (4, 0.6070432513304648), (5, 0.20700016775851657), (6, 0.48288356038446967), (7, 0.25990807545629047), (8, 0.41230568256635575), (9, 1.0000000000000018)]
1 Crash Time 3
2 Conflict of Heroes: Awakening the Bear
3 Real Warfare 2: Northern Crusades
4 Maelstrom: The Battle for Earth Begins
5 Below Zero
6 SpellForce - Platinum Edition
7 FARHOME
8 Momodora: Reverie Under The Moonlight
9 汉匈决战/Han Xiongnu Wars
10 Battle Princess Madelyn


In [20]:
game_recommendation('Star Wars: Battlefront 2 (Classic, 2005)')

[(0, 0.17847584807070943), (1, 0.31530697267541835), (2, 0.29912429204438706), (3, 0.29598469523734766), (4, 0.4725134973124137), (5, 0.20795607215099993), (6, 0.3289642407107981), (7, 0.2469574160411963), (8, 0.30709917586501845), (9, 0.6288277333931099)]
1 STAR WARS™ Empire at War - Gold Pack
2 LEGO® Star Wars™ III - The Clone Wars™
3 LEGO® Star Wars™ - The Complete Saga
4 Deathtrap
5 Distant Worlds: Universe
6 The Horus Heresy: Legions
7 STAR WARS™ - X-Wing Alliance™
8 Assassin’s Creed® Brotherhood
9 Ride 2
10 The Incredible Adventures of Van Helsing: Final Cut


In [21]:
game_recommendation('LEGO® Batman™: The Videogame')

[(0, 0.19645402251809166), (1, 0.2920453425011908), (2, 0.3152984925416119), (3, 0.34447965222111054), (4, 0.5130530344350364), (5, 0.19631683456984653), (6, 0.3713596393776642), (7, 0.19521037752235854), (8, 0.3938248951861542), (9, 0.6410106170070837)]
1 Batman™: Arkham Knight
2 汉匈决战/Han Xiongnu Wars
3 Battle For Landriel
4 Belle II in Virtual Reality
5 Crusaders: Thy Kingdom Come
6 Rivais Em Batalha
7 FallenCore
8 Shadow Council: The Puppeteers
9 Unlimited Escape 2
10 LEGO® Batman™ 3: Beyond Gotham


In [22]:
game_recommendation("Sid Meier’s Civilization® VI")

[(0, 0.2772722431713781), (1, 0.2974730023225597), (2, 0.3286518958350911), (3, 0.3680510347236818), (4, 0.4405337662416363), (5, 0.2118002756409483), (6, 0.38187556263409045), (7, 0.20085155040005595), (8, 0.32281332766688536), (9, 0.6105400348768288)]
1 Sorcerer King: Rivals
2 Sumer
3 Consortium: The Tower
4 The Horus Heresy: Legions
5 Distant Worlds: Universe
6 Star Traders: 4X Empires
7 Frozen Synapse Prime
8 Hover
9 TransOcean 2: Rivals
10 Z. Year One


In [23]:
game_recommendation("Grand Theft Auto V")

[(0, 0.17878095094245378), (1, 0.36642479465730654), (2, 0.37319577013775657), (3, 0.40206677569191274), (4, 0.5451249331439363), (5, 0.2242882343105267), (6, 0.4404647770109857), (7, 0.29336346195052254), (8, 0.3536495216931332), (9, 0.6840712037105842)]
1 ATLAS
2 The Crew™ 2
3 Space Viking Raiders
4 汉匈决战/Han Xiongnu Wars
5 Zwei: The Arges Adventure
6 Command: The Silent Service
7 BLADESTORM: Nightmare
8 Command: Modern Air / Naval Operations WOTY
9 Grand Ages: Rome
10 Bloody Faerie


In [26]:
games_names = steam_and_description_df.name.values

#final game recommendation function
def recommender(games):
    scores_sum = None
    for game in games:
        steam_app_id = steam_and_description_df[steam_and_description_df.name == game]['game_id'].values[0]
        if scores_sum is None:
            scores_sum = cs[steam_app_id]
        else:
            scores_sum += cs[steam_app_id]
    #Create a list of enumerations for the similarity score
    scores = list(enumerate(scores_sum))
    #sort the list
    sorted_scores = sorted(scores, key = lambda x:x[1], reverse = True)
    sorted_scores = sorted_scores[1:]
    
    results = []
    for item in sorted_scores:
        game_name = games_names[item[0]]
        results.append(game_name)
    return results

In [28]:
recommender(["Grand Theft Auto V", "Sid Meier’s Civilization® VI", 'LEGO® Batman™: The Videogame'])[:10]

['Earth 2150 Trilogy',
 'Endciv',
 'Batman™: Arkham Knight',
 'Sid Meier’s Civilization® VI',
 'Dishonored®: Death of the Outsider™',
 'Tales of the Lumminai',
 'VikingJourney',
 'Dishonored 2',
 'Command: Modern Air / Naval Operations WOTY',
 'The I of the Dragon']