In [None]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score


# Dataset
Steam Games.csv has over 70,000 games
Only games with a count of all their reviews are used (about 7% of the data)
The data is filtered this way in order to reduce the number of data processed and to only have games that are well reviewed by the steam community

In [None]:
steam_games = pd.read_csv('SteamGames.csv', usecols=['Title','Game Description','Popular Tags','All Reviews Number'])
steam_games = steam_games.drop_duplicates().dropna()
steam_games_tags = steam_games['Popular Tags'].apply(lambda x: x[1:-1].split(','))
games_df = pd.DataFrame({
    'Title': steam_games['Title'],
    'Game Description': steam_games['Game Description'],
    'Popular Tags': steam_games_tags
})

all_genres = pd.DataFrame({'Tags': games_df['Popular Tags'].explode().dropna().unique()});
all_genres = all_genres.drop_duplicates()

print("Number of games to recommend:\t", len(games_df), "\nNumber of different genres:\t", len(all_genres))

Number of games to recommend:	 5371 
Number of different genres:	 732


# Popular Tags Similarity Matrix


In [None]:
game_genre_matrix = []
for genres in games_df['Popular Tags']:
    row = [0] * len(all_genres)
    for genre in genres:
      genre_index = all_genres.loc[all_genres['Tags']==genre]
      row[genre_index.index[0]] = 1
    game_genre_matrix.append(row)
game_genre_df = pd.DataFrame(game_genre_matrix, columns=all_genres, index=games_df['Title'])

In [None]:
tag_similarity = cosine_similarity(game_genre_df)
df_tag_similarity = pd.DataFrame(tag_similarity, columns=games_df['Title'], index=games_df['Title']);
display(df_tag_similarity)

Title,Baldur's Gate 3,Counter-Strike: Global Offensive,Apex Legends™,Forza Horizon 5,Call of Duty®,PUBG: BATTLEGROUNDS,Cyberpunk 2077,Rust,Grand Theft Auto V,Tom Clancy's Rainbow Six® Siege,...,VERGE:Lost chapter,Resident Evil Re:Verse,HAWKEN REBORN,100 hidden cups,O2Jam Online,Call of Duty®: Warzone™,Their Land,Call of Duty®: Modern Warfare® II,SteamVR,Resident Evil 4 Chainsaw Demo
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Baldur's Gate 3,1.000000,0.150000,0.100000,0.200000,0.200000,0.200000,0.200000,0.150000,0.150000,0.150000,...,0.100000,0.150000,0.150000,0.050000,0.074536,0.200000,0.250000,0.200000,0.0,0.091287
Counter-Strike: Global Offensive,0.150000,1.000000,0.400000,0.350000,0.550000,0.600000,0.100000,0.350000,0.300000,0.850000,...,0.150000,0.200000,0.200000,0.000000,0.000000,0.450000,0.200000,0.500000,0.0,0.091287
Apex Legends™,0.100000,0.400000,1.000000,0.250000,0.300000,0.500000,0.250000,0.350000,0.300000,0.450000,...,0.250000,0.300000,0.200000,0.000000,0.000000,0.500000,0.300000,0.350000,0.0,0.182574
Forza Horizon 5,0.200000,0.350000,0.250000,1.000000,0.450000,0.400000,0.350000,0.450000,0.450000,0.350000,...,0.300000,0.400000,0.200000,0.100000,0.074536,0.600000,0.300000,0.450000,0.1,0.182574
Call of Duty®,0.200000,0.550000,0.300000,0.450000,1.000000,0.450000,0.350000,0.300000,0.500000,0.550000,...,0.400000,0.450000,0.250000,0.000000,0.074536,0.650000,0.250000,0.850000,0.0,0.182574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Call of Duty®: Warzone™,0.200000,0.450000,0.500000,0.600000,0.650000,0.500000,0.400000,0.500000,0.450000,0.450000,...,0.350000,0.550000,0.300000,0.050000,0.074536,1.000000,0.350000,0.700000,0.0,0.273861
Their Land,0.250000,0.200000,0.300000,0.300000,0.250000,0.250000,0.300000,0.300000,0.300000,0.250000,...,0.300000,0.350000,0.300000,0.150000,0.074536,0.350000,1.000000,0.250000,0.1,0.273861
Call of Duty®: Modern Warfare® II,0.200000,0.500000,0.350000,0.450000,0.850000,0.500000,0.350000,0.350000,0.450000,0.500000,...,0.350000,0.550000,0.250000,0.000000,0.074536,0.700000,0.250000,1.000000,0.0,0.091287
SteamVR,0.000000,0.000000,0.000000,0.100000,0.000000,0.100000,0.000000,0.100000,0.000000,0.100000,...,0.100000,0.000000,0.100000,0.000000,0.000000,0.000000,0.100000,0.000000,1.0,0.000000


# Game Description Similarity Matrix

In [None]:
# Get recommendations by game description
tfidf = TfidfVectorizer(stop_words="english")
games_df['Game Description'] = games_df['Game Description'].fillna("")
tfidf_matrix = tfidf.fit_transform(games_df['Game Description'])

In [None]:
desc_similarity = cosine_similarity(tfidf_matrix)
df_desc_similarity = pd.DataFrame(desc_similarity, columns=games_df['Title'], index=games_df['Title']);
display(df_desc_similarity)

Title,Baldur's Gate 3,Counter-Strike: Global Offensive,Apex Legends™,Forza Horizon 5,Call of Duty®,PUBG: BATTLEGROUNDS,Cyberpunk 2077,Rust,Grand Theft Auto V,Tom Clancy's Rainbow Six® Siege,...,VERGE:Lost chapter,Resident Evil Re:Verse,HAWKEN REBORN,100 hidden cups,O2Jam Online,Call of Duty®: Warzone™,Their Land,Call of Duty®: Modern Warfare® II,SteamVR,Resident Evil 4 Chainsaw Demo
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Baldur's Gate 3,1.000000,0.011879,0.000000,0.000000,0.000000,0.000000,0.068427,0.00000,0.000000,0.020796,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.016413,0.000000,0.000000,0.019236
Counter-Strike: Global Offensive,0.011879,1.000000,0.031633,0.008580,0.000000,0.021394,0.009158,0.00000,0.003424,0.033407,...,0.000000,0.024503,0.010514,0.000000,0.013480,0.035851,0.060391,0.055060,0.000000,0.006750
Apex Legends™,0.000000,0.031633,1.000000,0.009066,0.000000,0.133719,0.000000,0.00000,0.058295,0.044224,...,0.077578,0.033358,0.041398,0.000000,0.028941,0.059600,0.035139,0.000000,0.020626,0.011877
Forza Horizon 5,0.000000,0.008580,0.009066,1.000000,0.000000,0.006426,0.045037,0.00000,0.030851,0.000000,...,0.000000,0.000000,0.045600,0.014675,0.004436,0.000000,0.028657,0.000000,0.000000,0.015343
Call of Duty®,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.00000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.148785,0.000000,0.363556,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Call of Duty®: Warzone™,0.000000,0.035851,0.059600,0.000000,0.148785,0.030728,0.000000,0.00000,0.000000,0.000000,...,0.000000,0.000000,0.129441,0.000000,0.037969,1.000000,0.016163,0.042028,0.014648,0.000000
Their Land,0.016413,0.060391,0.035139,0.028657,0.000000,0.035303,0.024636,0.03746,0.016621,0.000000,...,0.000000,0.025393,0.015951,0.020041,0.020451,0.016163,1.000000,0.000000,0.010258,0.020954
Call of Duty®: Modern Warfare® II,0.000000,0.055060,0.000000,0.000000,0.363556,0.000000,0.000000,0.00000,0.017162,0.000000,...,0.000000,0.027645,0.037182,0.000000,0.052840,0.042028,0.000000,1.000000,0.000000,0.000000
SteamVR,0.000000,0.000000,0.020626,0.000000,0.000000,0.007310,0.000000,0.00000,0.000000,0.000000,...,0.000000,0.000000,0.010175,0.000000,0.013045,0.014648,0.010258,0.000000,1.000000,0.000000


# Recommend Game

In [None]:
def recommend_games(game_name, similarity_matrix, top_n=5):
    game_idx = list(game_genre_df.index).index(game_name)
    similarity_scores = similarity_matrix[game_idx]
    similarity_scores = similarity_scores.argsort()
    similar_games_idx = similarity_scores[-top_n-1:-1][::-1]
    for index, rec in enumerate([game_genre_df.index[i] for i in similar_games_idx]):
      print(f"Game {index+1}: {rec}")
      games_df.index = games_df['Title']
      # print(f"\t{games_df.loc[rec,'Game Description']}\n")

In [None]:
# favorite_game = "Slay the Spire" # Test game
favorite_game = input("What's your favorite game? ")
try:
  print("\nSteam recommendation by popular tags")
  recommendations = recommend_games(favorite_game, tag_similarity, 5)
  print("\nSteam recommendation by game description")
  recommendations = recommend_games(favorite_game, desc_similarity, 5)
except ValueError:
  print("That game is not in our list!")

What's your favorite game? Forza Horizon 5

Steam recommendation by popular tags
Game 1: Forza Horizon 4
Game 2: Need For Speed: Hot Pursuit
Game 3: The Crew™ 2
Game 4: Need for Speed™ Heat
Game 5: Need for Speed™ Rivals

Steam recommendation by game description
Game 1: Catto Pew Pew!
Game 2: Portal
Game 3: Death Horizon: Reloaded
Game 4: Motor Town: Behind The Wheel
Game 5: Trailmakers


# Dataset Info

## Display all game titles

In [None]:
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_colwidth', 500)
display(games_df['Game Description'])

Unnamed: 0_level_0,Game Description
Title,Unnamed: 1_level_1
Baldur's Gate 3,"Baldur’s Gate 3 is a story-rich, party-based RPG set in the universe of Dungeons & Dragons, where your choices shape a tale of fellowship and betrayal, survival and sacrifice, and the lure of absolute power."
Counter-Strike: Global Offensive,"Counter-Strike: Global Offensive (CS: GO) expands upon the team-based action gameplay that it pioneered when it was launched 19 years ago. CS: GO features new maps, characters, weapons, and game modes, and delivers updated versions of the classic CS content (de_dust2, etc.)."
Apex Legends™,"Apex Legends is the award-winning, free-to-play Hero Shooter from Respawn Entertainment. Master an ever-growing roster of legendary characters with powerful abilities, and experience strategic squad play and innovative gameplay in the next evolution of Hero Shooter and Battle Royale."
Forza Horizon 5,"Your Ultimate Horizon Adventure awaits! Explore the vibrant open world landscapes of Mexico with limitless, fun driving action in the world’s greatest cars. Conquer the rugged Sierra Nueva in the ultimate Horizon Rally experience. Requires Forza Horizon 5 game, expansion sold separately."
Call of Duty®,"Welcome to Call of Duty® HQ, the home of Call of Duty®: Modern Warfare® III, Call of Duty®: Modern Warfare® II and Warzone™."
...,...
Call of Duty®: Warzone™,"Welcome to Warzone™, the massive free-to-play combat arena which now features the brand-new map, Al Mazrah."
Their Land,"Their Land is an action-adventure first-person game with puzzles and an exciting story to experience. You play Jeremy, a 19 year old orphan who explores an untrodden island with his much older and more experienced friends. Will you fall victim to this fabled lands many perils?"
Call of Duty®: Modern Warfare® II,Call of Duty®: Modern Warfare® II drops players into an unprecedented global conflict that features the return of the iconic Operators of Task Force 141.
SteamVR,"Grab SteamVR to access and play VR games using your HTC Vive, Oculus Rift, Windows Mixed Reality headset, or any other supported VR headset and controllers."


## Display all steam tags

In [None]:
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_rows', 10)
display(all_genres)

Unnamed: 0,Tags
0,'RPG'
1,'Choices Matter'
2,'Character Customization'
3,'Story Rich'
4,'Adventure'
...,...
727,'Sokoban'
728,'Philosophical'
729,'Cartoony'
730,'Underground'


# Accuracy

In [None]:
# List of actual scores
actual_scores = [1, 0, 1, 0, 1]

# Predicted scores
def get_predictions(game_name, similarity_matrix, top_n=5):
    game_idx = list(game_genre_df.index).index(game_name)
    similarity_scores = similarity_matrix[game_idx]
    top_similarities = np.sort(similarity_scores)[-top_n:]
    return list(top_similarities[::-1])

In [None]:
# Evaluation of Popular Tags
print("\nEvaluation of Popular Tags")
predicted_scores_tags = get_predictions(favorite_game, tag_similarity, top_n=5)
true_relevance_tags = actual_scores[:len(predicted_scores_tags)]
mae_tags = mean_absolute_error(true_relevance_tags, predicted_scores_tags)
mse_tags = mean_squared_error(true_relevance_tags, predicted_scores_tags)
predicted_binary_tags = [1 if score >= 0.5 else 0 for score in predicted_scores_tags]
accuracy_tags = accuracy_score(true_relevance_tags, predicted_binary_tags)

print(f"MAE: {mae_tags}")
print(f"MSE: {mse_tags}")
print(f"Accuracy: {accuracy_tags}")


Evaluation of Popular Tags
MAE: 0.43000000000000005
MSE: 0.2985
Accuracy: 0.6


In [None]:
# Evaluation of Game Descriptions
print("\nEvaluation of Game Descriptions")
predicted_scores_desc = get_predictions(favorite_game, desc_similarity, top_n=5)
true_relevance_desc = actual_scores[:len(predicted_scores_desc)]
mae_desc = mean_absolute_error(true_relevance_desc, predicted_scores_desc)
mse_desc = mean_squared_error(true_relevance_desc, predicted_scores_desc)
predicted_binary_desc = [1 if score >= 0.5 else 0 for score in predicted_scores_desc]
accuracy_desc = accuracy_score(true_relevance_desc, predicted_binary_desc)

print(f"MAE: {mae_desc}")
print(f"MSE: {mse_desc}")
print(f"Accuracy: {accuracy_desc}")


Evaluation of Game Descriptions
MAE: 0.4069225153623509
MSE: 0.2842923829200349
Accuracy: 0.6
