# Recommendation System

### imports

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel 
import numpy as np

### bringing in the clean data

In [2]:
thanos = pd.read_csv('../../../data/thanos.csv')
thanos

Unnamed: 0,steamid,appid,app_title,app_tags,review,fps,voted_up
0,76561198271088129,4000,Garry's Mod,"['Sandbox', 'Multiplayer', 'Funny', 'Moddable'...",good models\n,True,True
1,76561198138206834,4000,Garry's Mod,"['Sandbox', 'Multiplayer', 'Funny', 'Moddable'...",I completely suck at making anything on here a...,True,True
2,76561198128760839,70,Half-Life,"['FPS', 'Sci-fi', 'Action', 'Singleplayer', ""1...",There is not much to say about this old game t...,True,True
3,76561198079636858,6060,"Star Wars: Battlefront 2 (Classic, 2005)","['Action', 'Multiplayer', 'Shooter', 'Third-Pe...",360 noscoped almost everything 10/10 even the ...,True,True
4,76561198238100200,220,Half-Life 2,"['FPS', 'Action', 'Sci-fi', 'Classic', 'Single...",No need for a review. It's practically history.,True,True
...,...,...,...,...,...,...,...
23178,76561198868573076,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...",crap game i got 23000 hours and stil silver,True,False
23179,76561198133294233,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...","meh, still so many cheaters",True,False
23180,76561197989220516,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...","to many hackers,high ping servers",True,False
23181,76561198319040167,730,Counter-Strike: Global Offensive,"['FPS', 'Shooter', 'Multiplayer', 'Competitive...","terrible game, way to many cheaters",True,False


### Making the app_tags column usable with the recommendation system

making the app_tags column not individual strings.

In [3]:
from ast import literal_eval

features = ['app_tags']
for feature in features:
    thanos[feature] = thanos[feature].apply(literal_eval)

In [4]:
thanos

Unnamed: 0,steamid,appid,app_title,app_tags,review,fps,voted_up
0,76561198271088129,4000,Garry's Mod,"[Sandbox, Multiplayer, Funny, Moddable, Buildi...",good models\n,True,True
1,76561198138206834,4000,Garry's Mod,"[Sandbox, Multiplayer, Funny, Moddable, Buildi...",I completely suck at making anything on here a...,True,True
2,76561198128760839,70,Half-Life,"[FPS, Sci-fi, Action, Singleplayer, 1990's, Sh...",There is not much to say about this old game t...,True,True
3,76561198079636858,6060,"Star Wars: Battlefront 2 (Classic, 2005)","[Action, Multiplayer, Shooter, Third-Person Sh...",360 noscoped almost everything 10/10 even the ...,True,True
4,76561198238100200,220,Half-Life 2,"[FPS, Action, Sci-fi, Classic, Singleplayer, S...",No need for a review. It's practically history.,True,True
...,...,...,...,...,...,...,...
23178,76561198868573076,730,Counter-Strike: Global Offensive,"[FPS, Shooter, Multiplayer, Competitive, Actio...",crap game i got 23000 hours and stil silver,True,False
23179,76561198133294233,730,Counter-Strike: Global Offensive,"[FPS, Shooter, Multiplayer, Competitive, Actio...","meh, still so many cheaters",True,False
23180,76561197989220516,730,Counter-Strike: Global Offensive,"[FPS, Shooter, Multiplayer, Competitive, Actio...","to many hackers,high ping servers",True,False
23181,76561198319040167,730,Counter-Strike: Global Offensive,"[FPS, Shooter, Multiplayer, Competitive, Actio...","terrible game, way to many cheaters",True,False


Lowering the tags and removing spaces.

In [5]:
def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(" ", "")) for i in x]

In [6]:
for feature in features:
    thanos[feature] = thanos[feature].apply(clean_data)

In [7]:
thanos

Unnamed: 0,steamid,appid,app_title,app_tags,review,fps,voted_up
0,76561198271088129,4000,Garry's Mod,"[sandbox, multiplayer, funny, moddable, buildi...",good models\n,True,True
1,76561198138206834,4000,Garry's Mod,"[sandbox, multiplayer, funny, moddable, buildi...",I completely suck at making anything on here a...,True,True
2,76561198128760839,70,Half-Life,"[fps, sci-fi, action, singleplayer, 1990's, sh...",There is not much to say about this old game t...,True,True
3,76561198079636858,6060,"Star Wars: Battlefront 2 (Classic, 2005)","[action, multiplayer, shooter, third-personsho...",360 noscoped almost everything 10/10 even the ...,True,True
4,76561198238100200,220,Half-Life 2,"[fps, action, sci-fi, classic, singleplayer, s...",No need for a review. It's practically history.,True,True
...,...,...,...,...,...,...,...
23178,76561198868573076,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...",crap game i got 23000 hours and stil silver,True,False
23179,76561198133294233,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...","meh, still so many cheaters",True,False
23180,76561197989220516,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...","to many hackers,high ping servers",True,False
23181,76561198319040167,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...","terrible game, way to many cheaters",True,False


Taking the tags out of a list and into a usable format to be vectorized.

In [8]:
def create_usable(x):
    return ' '.join(x['app_tags']) + ' ' + ' '

In [9]:
thanos['usable'] = thanos.apply(create_usable, axis=1)

In [10]:
thanos

Unnamed: 0,steamid,appid,app_title,app_tags,review,fps,voted_up,usable
0,76561198271088129,4000,Garry's Mod,"[sandbox, multiplayer, funny, moddable, buildi...",good models\n,True,True,sandbox multiplayer funny moddable building co...
1,76561198138206834,4000,Garry's Mod,"[sandbox, multiplayer, funny, moddable, buildi...",I completely suck at making anything on here a...,True,True,sandbox multiplayer funny moddable building co...
2,76561198128760839,70,Half-Life,"[fps, sci-fi, action, singleplayer, 1990's, sh...",There is not much to say about this old game t...,True,True,fps sci-fi action singleplayer 1990's shooter ...
3,76561198079636858,6060,"Star Wars: Battlefront 2 (Classic, 2005)","[action, multiplayer, shooter, third-personsho...",360 noscoped almost everything 10/10 even the ...,True,True,action multiplayer shooter third-personshooter...
4,76561198238100200,220,Half-Life 2,"[fps, action, sci-fi, classic, singleplayer, s...",No need for a review. It's practically history.,True,True,fps action sci-fi classic singleplayer storyri...
...,...,...,...,...,...,...,...,...
23178,76561198868573076,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...",crap game i got 23000 hours and stil silver,True,False,fps shooter multiplayer competitive action tea...
23179,76561198133294233,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...","meh, still so many cheaters",True,False,fps shooter multiplayer competitive action tea...
23180,76561197989220516,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...","to many hackers,high ping servers",True,False,fps shooter multiplayer competitive action tea...
23181,76561198319040167,730,Counter-Strike: Global Offensive,"[fps, shooter, multiplayer, competitive, actio...","terrible game, way to many cheaters",True,False,fps shooter multiplayer competitive action tea...


Creating a new DataFrame with only the necessary columns.

In [11]:
title_and_tags = thanos.drop(['steamid', 'appid', 'app_tags', 'review', 'fps', 'voted_up'], axis=1)

In [12]:
title_and_tags

Unnamed: 0,app_title,usable
0,Garry's Mod,sandbox multiplayer funny moddable building co...
1,Garry's Mod,sandbox multiplayer funny moddable building co...
2,Half-Life,fps sci-fi action singleplayer 1990's shooter ...
3,"Star Wars: Battlefront 2 (Classic, 2005)",action multiplayer shooter third-personshooter...
4,Half-Life 2,fps action sci-fi classic singleplayer storyri...
...,...,...
23178,Counter-Strike: Global Offensive,fps shooter multiplayer competitive action tea...
23179,Counter-Strike: Global Offensive,fps shooter multiplayer competitive action tea...
23180,Counter-Strike: Global Offensive,fps shooter multiplayer competitive action tea...
23181,Counter-Strike: Global Offensive,fps shooter multiplayer competitive action tea...


Dropping the duplicate game titles.

In [13]:
title_and_tags = title_and_tags.drop_duplicates()

In [14]:
title_and_tags['app_title'].unique()

array(["Garry's Mod", 'Half-Life',
       'Star Wars: Battlefront 2 (Classic, 2005)', 'Half-Life 2',
       'The Ship: Murder Party', 'Fallout: New Vegas', 'Portal 2',
       'Left 4 Dead 2', 'Call of Duty® 4: Modern Warfare®',
       'Borderlands 2', 'Ultimate Doom', 'Half-Life 2: Lost Coast',
       'Counter-Strike: Condition Zero', 'Team Fortress 2',
       'Killing Floor', 'Arma 3', 'S.T.A.L.K.E.R.: Call of Pripyat',
       'Call of Duty® 2', 'Chaser', 'Half-Life: Blue Shift',
       'Red Orchestra 2: Heroes of Stalingrad with Rising Storm',
       'STAR WARS™ Jedi Knight II - Jedi Outcast™', 'BioShock Infinite',
       'STAR WARS™ Jedi Knight - Jedi Academy™',
       'Hitman 2: Silent Assassin', 'Call of Duty®: Modern Warfare® 2',
       "Mirror's Edge™", 'Aliens vs. Predator™',
       'Deus Ex: Game of the Year Edition',
       'Fallout 3: Game of the Year Edition',
       'Arma 2: Operation Arrowhead', 'Crysis 2 - Maximum Edition',
       'ORION: Prelude', 'Crysis', 'Zeno Clash'

Vectorizing the tags of the games so they can be put into a matrix. The matrix was then compared to itself to calculate the cosine similarity of the tags for the games. This give a score for how similar the tags of one game are to another game.

In [15]:
from sklearn.feature_extraction.text import CountVectorizer

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(title_and_tags['usable'])

In [16]:
count_matrix.shape

(166, 204)

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(count_matrix, count_matrix)

Putting the games list into a series.

In [18]:
recommend = title_and_tags.reset_index()
indices = pd.Series(recommend.index, index=recommend['app_title'])

In [27]:
indices['Half-Life']

1

# The Recommendations

The function takes in a title of a game, the score that is predicted from the NLP model (1 for a positive review or 0 for a negative review), and the cosine similarities calculated for the games. The function returns the top 10 games with the most similar tags if the predicted score for the review was a 1 or the bottom 10 games with the least similar tags if the predicted score for the review was a 0.

In [30]:
def get_recommendations(title, score, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    if score == 0:
        sim_scores = sim_scores[156:167]
    elif score == 1:
        sim_scores = sim_scores[1:11]

    # Get the movie indices
    game_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return recommend['app_title'].iloc[game_indices]

# Testing the function

In [37]:
get_recommendations('Half-Life', 1)

19                               Half-Life: Blue Shift
44                           Half-Life: Opposing Force
101    STAR WARS™ Jedi Knight - Mysteries of the Sith™
3                                          Half-Life 2
61                                            Quake IV
42                                               QUAKE
62                            STAR WARS™ - Dark Forces
33                                              Crysis
57                            Half-Life 2: Episode One
118                                        Red Faction
Name: app_title, dtype: object

In [53]:
get_recommendations('Half-Life', 0)

88                          Rogue Warrior
81                            Men of War™
94                ARMA: Combat Operations
159                      Revelations 2012
129         Space Trader: Merchant Marine
87                    Men of War: Vietnam
82              Men of War: Assault Squad
164            Hamilton's Great Adventure
160    Brothers in Arms: Earned in Blood™
13                        Team Fortress 2
Name: app_title, dtype: object

In [54]:
get_recommendations('STAR WARS™ - Dark Forces', 1)

101    STAR WARS™ Jedi Knight - Mysteries of the Sith™
117              Heretic: Shadow of the Serpent Riders
102             STAR WARS™ Jedi Knight: Dark Forces II
41                                             DOOM II
42                                               QUAKE
47                                         Unreal Gold
1                                            Half-Life
76                                      Red Faction II
152                                 Project: Snowblind
137                             Section 8®: Prejudice™
Name: app_title, dtype: object

In [55]:
get_recommendations('STAR WARS™ - Dark Forces', 0)

163                           Empires Mod
160    Brothers in Arms: Earned in Blood™
98                             Dino D-Day
81                            Men of War™
164            Hamilton's Great Adventure
87                    Men of War: Vietnam
165      Counter-Strike: Global Offensive
13                        Team Fortress 2
159                      Revelations 2012
82              Men of War: Assault Squad
Name: app_title, dtype: object

In [56]:
get_recommendations('Section 8®: Prejudice™', 1)

108                                        Alpha Prime
78                              Blacklight: Tango Down
140                 Tom Clancy's Rainbow Six Lockdown™
76                                      Red Faction II
152                                 Project: Snowblind
101    STAR WARS™ Jedi Knight - Mysteries of the Sith™
118                                        Red Faction
124                                       Nuclear Dawn
151                                Painkiller Overdose
18                                              Chaser
Name: app_title, dtype: object

In [57]:
get_recommendations('Call of Juarez', 1)

70                Call of Juarez: Bound in Blood
33                                        Crysis
83                                      Far Cry®
21     STAR WARS™ Jedi Knight II - Jedi Outcast™
63                 Far Cry® 2: Fortune's Edition
118                                  Red Faction
96                      Medal of Honor: Airborne
111                              Crysis Warhead®
31                    Crysis 2 - Maximum Edition
107                                    Homefront
Name: app_title, dtype: object

In [58]:
get_recommendations('Medal of Honor™', 0)

131    Pirates, Vikings, and Knights II
141             SiN Episodes: Emergence
10                        Ultimate Doom
113               HeXen: Beyond Heretic
72                                Xotic
112       Carrier Command: Gaea Mission
26                       Mirror's Edge™
75                             Ricochet
164          Hamilton's Great Adventure
129       Space Trader: Merchant Marine
Name: app_title, dtype: object

In [59]:
get_recommendations('Call of Duty®: Modern Warfare® 2', 0)

143         Painkiller: Resurrection
92       Hard Reset Extended Edition
150                     EverQuest II
112    Carrier Command: Gaea Mission
135                  Darkest of Days
75                          Ricochet
88                     Rogue Warrior
113            HeXen: Beyond Heretic
72                             Xotic
129    Space Trader: Merchant Marine
Name: app_title, dtype: object

### Pickling the function, vectorizer, and cosine similarities to use in deployment.

In [39]:
import pickle

In [64]:
with open('get_recommendations.pkl', 'wb') as f:
    pickle.dump(get_recommendations, f)

In [57]:
with open('count_vectorizer.pkl', 'wb') as f:
    pickle.dump(count, f)

In [65]:
with open('cosine_sim.pkl', 'wb') as f:
    pickle.dump(cosine_sim, f)

In [40]:
with open('recommend.pkl', 'wb') as f:
    pickle.dump(recommend, f)

In [41]:
with open('indices.pkl', 'wb') as f:
    pickle.dump(indices, f)