# 2023 Steam Games Recommendation System

The core of this project lies in analyzing a wide array of categories and genres to accurately suggest games that align with a user's interests and play style. 

In [122]:
import pandas as pd
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

I am utilizing a Kaggle dataset containing games' metadata, including names, required age, and categories.

In [123]:
games_df = pd.read_csv('steam_app_data.csv')
games_df.head()

Unnamed: 0,type,name,steam_appid,required_age,is_free,controller_support,dlc,detailed_description,about_the_game,short_description,...,categories,genres,screenshots,movies,recommendations,achievements,release_date,support_info,background,content_descriptors
0,game,Counter-Strike,10,0.0,False,,,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,Play the world's number 1 online action game. ...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 137378},,"{'coming_soon': False, 'date': '1 Nov, 2000'}","{'url': 'http://steamcommunity.com/app/10', 'e...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [2, 5], 'notes': 'Includes intense vio..."
1,game,Team Fortress Classic,20,0.0,False,,,One of the most popular online action games of...,One of the most popular online action games of...,One of the most popular online action games of...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 5474},,"{'coming_soon': False, 'date': '1 Apr, 1999'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [2, 5], 'notes': 'Includes intense vio..."
2,game,Day of Defeat,30,0.0,False,,,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,Enlist in an intense brand of Axis vs. Allied ...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 3694},,"{'coming_soon': False, 'date': '1 May, 2003'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
3,game,Deathmatch Classic,40,0.0,False,,,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,Enjoy fast-paced multiplayer gaming with Death...,...,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 1924},,"{'coming_soon': False, 'date': '1 Jun, 2001'}","{'url': '', 'email': ''}",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"
4,game,Half-Life: Opposing Force,50,0.0,False,,,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,Return to the Black Mesa Research Facility as ...,...,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]","[{'id': 0, 'path_thumbnail': 'https://cdn.akam...",,{'total': 15478},,"{'coming_soon': False, 'date': '1 Nov, 1999'}","{'url': 'https://help.steampowered.com', 'emai...",https://cdn.akamai.steamstatic.com/steam/apps/...,"{'ids': [], 'notes': None}"


In [124]:
#create a smaller dataframe focused only on name, categories and genres columns.
games_cat_df = games_df[['name','categories', 'genres']]
games_cat_df.head()

Unnamed: 0,name,categories,genres
0,Counter-Strike,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
1,Team Fortress Classic,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
2,Day of Defeat,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
3,Deathmatch Classic,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
4,Half-Life: Opposing Force,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]"


In [125]:
#check for null values
games_cat_df.isnull().sum()

name          0
categories    8
genres        8
dtype: int64

In [126]:
#drop any null values
games_cat_df = games_cat_df.dropna()

In [127]:
#check if all null values are gone
games_cat_df.isnull().sum()

name          0
categories    0
genres        0
dtype: int64

I am utilizing features to personalize the recommendation: categories, genres. The data is present in a list of strings, so  am utilizing literal_eval function to convert the data into a safe and usable structure.

In [129]:
features = ["categories", "genres"]
for feature in features:
    games_cat_df[feature] = games_cat_df[feature].apply(literal_eval)
games_cat_df[features].head()

Unnamed: 0,categories,genres
0,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
1,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
2,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
3,"[{'id': 1, 'description': 'Multi-player'}, {'i...","[{'id': '1', 'description': 'Action'}]"
4,"[{'id': 2, 'description': 'Single-player'}, {'...","[{'id': '1', 'description': 'Action'}]"


In [130]:
#create a get list function to extract the string data from the features. Takes up to 15 elements (can be changed)
def get_list(x):
    if isinstance(x, list):
        tags = [i["description"] for i in x]
        if len(tags) > 15:
            tags = tags[:15]
        return tags
    return []

In [131]:
features = ["categories", "genres"]
for feature in features:
    games_cat_df[feature] = games_cat_df[feature].apply(get_list)

In [132]:
games_cat_df.head()

Unnamed: 0,name,categories,genres
0,Counter-Strike,"[Multi-player, PvP, Online PvP, Shared/Split S...",[Action]
1,Team Fortress Classic,"[Multi-player, PvP, Online PvP, Shared/Split S...",[Action]
2,Day of Defeat,"[Multi-player, Valve Anti-Cheat enabled]",[Action]
3,Deathmatch Classic,"[Multi-player, PvP, Online PvP, Shared/Split S...",[Action]
4,Half-Life: Opposing Force,"[Single-player, Multi-player, Valve Anti-Cheat...",[Action]


In [133]:
#create a fucntion to clean the features by removing spaces and lowercase
def clean_data(row):
    if isinstance(row, list):
        return [str.lower(i.replace(" ", "")) for i in row]
    else:
        if isinstance(row, str):
            return str.lower(row.replace(" ", ""))
        else:
            return ""
        
features = ["categories", "genres"]
for feature in features:
    games_cat_df[feature] = games_cat_df[feature].apply(clean_data)

In [134]:
#create a "soup" containing all the metadata extracted for vectorizer use
def create_soup(features):
    return ' '.join(features['categories']) + ' ' + ' '.join(features['genres'])

games_cat_df["soup"] = games_cat_df.apply(create_soup, axis=1)
print(games_cat_df["soup"].head())

0    multi-player pvp onlinepvp shared/splitscreenp...
1    multi-player pvp onlinepvp shared/splitscreenp...
2           multi-player valveanti-cheatenabled action
3    multi-player pvp onlinepvp shared/splitscreenp...
4    single-player multi-player valveanti-cheatenab...
Name: soup, dtype: object


The similarity between games are calculated and used for recommendation using the metadata information collected. The "soup" data will be preprocessed and converted into a vectorizer. Then we use cosine similarity score to measure the similiarty between two vectors.

In [135]:
count_vectorizer = CountVectorizer(stop_words="english")
count_matrix = count_vectorizer.fit_transform(games_cat_df["soup"])

print(count_matrix.shape)

cosine_sim2 = cosine_similarity(count_matrix, count_matrix)
print(cosine_sim2.shape)

games_cat_df = games_cat_df.reset_index()
indices = pd.Series(games_cat_df.index, index=games_cat_df['name'])

(990, 129)
(990, 990)


In [136]:
#create a reverse mappping of name and index to make searching easier
indices = pd.Series(games_cat_df.index, index=games_cat_df['name']).drop_duplicates()
print(indices.head())

name
Counter-Strike                     0
Team Fortress Classic              1
Day of Defeat                      2
Deathmatch Classic                 3
Half-Life: Opposing Force          4
Ricochet                           5
Half-Life                          6
Counter-Strike: Condition Zero     7
Half-Life: Blue Shift              8
Half-Life 2                        9
Counter-Strike: Source            10
Half-Life: Source                 11
Day of Defeat: Source             12
Half-Life 2: Deathmatch           13
Half-Life 2: Lost Coast           14
dtype: int64


get_recommendation function takes the name of a game and the similarity function as input. It will make a recommendation based on the index of games, get a list of similiarty scores, enumerate and sort the tuples in descending order based on similarity score, then get the list of top 10 and return the list

In [137]:
def get_recommendations(name, cosine_sim=cosine_sim2):
    idx = indices[name]
    similarity_scores = list(enumerate(cosine_sim[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x:x[1], reverse=True)
    similarity_scores = similarity_scores[1:11]
    game_indices = [ind[0] for ind in similarity_scores]
    games = games_cat_df["name"].iloc[game_indices]
    return games

In [138]:
print(get_recommendations("Counter-Strike", cosine_sim2))

1                       Team Fortress Classic
3                          Deathmatch Classic
5                                    Ricochet
2                               Day of Defeat
15               Half-Life Deathmatch: Source
766                    SCP: Secret Laboratory
46     STAR WARS™ Jedi Knight - Jedi Academy™
6                                   Half-Life
31                     The Ship: Murder Party
13                    Half-Life 2: Deathmatch
Name: name, dtype: object


In [139]:
print(get_recommendations("Left 4 Dead", cosine_sim2))

214                Call of Duty®: Black Ops II
241                      Call of Duty®: Ghosts
388           Dead Rising 3 Apocalypse Edition
448                          Ryse: Son of Rome
24                                 Alien Swarm
167                  Red Faction®: Armageddon™
447                        Zombie Army Trilogy
21                               Left 4 Dead 2
71     Call of Duty®: Modern Warfare® 2 (2009)
73                        Aliens vs. Predator™
Name: name, dtype: object


In [140]:
print(get_recommendations("Half-Life 2", cosine_sim2))

34                                    Psychonauts
17                                         Portal
673                                NieR:Automata™
16                       Half-Life 2: Episode One
18                       Half-Life 2: Episode Two
322    Deus Ex: Human Revolution - Director's Cut
169                         Saints Row: The Third
549                                          DOOM
215                           Hitman: Absolution™
875                                   Ghostrunner
Name: name, dtype: object
