In [1]:
import random
from pathlib import Path
from typing import Generator, Callable
import pickle

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import pandas as pd
import re
import numpy as np
import seaborn as sns
from difflib import get_close_matches
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics.pairwise import linear_kernel
import ast
from sklearn import preprocessing
import matplotlib as mpl
from warnings import simplefilter

simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [2]:
data = Path("data")
dataset_path = data / "merged_data.csv"
Predictor = Callable[[str, int], list[tuple[str, float]]]

In [3]:
games_df = pd.read_csv(dataset_path)
games_df

Unnamed: 0,Title,Original Price,Discounted Price,Release Date,Link,Game Description,Recent Reviews Summary,All Reviews Summary,Recent Reviews Number,All Reviews Number,Developer,Publisher,Supported Languages,Popular Tags,Game Features,Minimum Requirements
0,Baldur's Gate 3,$29.99,$29.99,"3 Aug, 2023",https://store.steampowered.com/app/1086940/Bal...,"Baldur’s Gate 3 is a story-rich, party-based R...",Overwhelmingly Positive,Very Positive,"- 96% of the 128,900 user reviews in the last ...","- 94% of the 188,617 user reviews for this gam...",Larian Studios,Larian Studios,"['English', 'French', 'German', 'Spanish - Spa...","['RPG', 'Choices Matter', 'Character Customiza...","['Single-player', 'Online Co-op', 'LAN Co-op',...",Requires a 64-bit processor and operating syst...
1,Counter-Strike: Global Offensive,$14.99,$14.99,"21 Aug, 2012",https://store.steampowered.com/app/730/Counter...,Counter-Strike: Global Offensive (CS: GO) expa...,Very Positive,Very Positive,"- 89% of the 75,284 user reviews in the last 3...","- 88% of the 7,428,921 user reviews for this g...","Valve, Hidden Path Entertainment",Valve,"['English', 'Czech', 'Danish', 'Dutch', 'Finni...","['FPS', 'Shooter', 'Multiplayer', 'Competitive...","['Steam Achievements', 'Full controller suppor...",OS: | Windows® 7/Vista/XP | Processor: | Int...
2,Apex Legends™,Free,Free,"4 Nov, 2020",https://store.steampowered.com/app/1172470/Ape...,"Apex Legends is the award-winning, free-to-pla...",Mixed,Very Positive,"- 65% of the 18,581 user reviews in the last 3...","- 80% of the 701,597 user reviews for this gam...",Respawn Entertainment,Electronic Arts,"['English', 'French', 'Italian', 'German', 'Sp...","['Free to Play', 'Multiplayer', 'Battle Royale...","['Online PvP', 'Online Co-op', 'Steam Achievem...",Requires a 64-bit processor and operating syst...
3,Forza Horizon 5,$34.78,$17.39,"8 Nov, 2021",https://store.steampowered.com/app/1551360/For...,Your Ultimate Horizon Adventure awaits! Explor...,Very Positive,Very Positive,"- 87% of the 4,120 user reviews in the last 30...","- 88% of the 123,162 user reviews for this gam...",Playground Games,Xbox Game Studios,"['English', 'French', 'Italian', 'German', 'Sp...","['Racing', 'Open World', 'Driving', 'Multiplay...","['Single-player', 'Online PvP', 'Online Co-op'...",Requires a 64-bit processor and operating syst...
4,Call of Duty®,Free,Free,"27 Oct, 2022",https://store.steampowered.com/app/1938090/Cal...,"Welcome to Call of Duty® HQ, the home of Call ...",Mixed,Mixed,"- 49% of the 8,257 user reviews in the last 30...","- 57% of the 236,876 user reviews for this gam...","Infinity Ward, Raven Software, Beenox, Treyarc...",Activision,"['English', 'French', 'Italian', 'German', 'Sp...","['FPS', 'Multiplayer', 'Shooter', 'Action', 'S...","['Single-player', 'Online PvP', 'Online Co-op'...",Requires a 64-bit processor and operating syst...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71695,The Front,Free,Free,To be announced,https://store.steampowered.com/app/2285150/The...,The Front is a survival-open-world-crafting (S...,,,,,Samar Studio,Samar Studio,"['English', 'French', 'German', 'Russian', 'Ja...","['Early Access', 'Survival', 'Building', 'Sand...","['Single-player', 'MMO', 'Online PvP', 'LAN Pv...",OS: | Requires a 64-bit processor and operati...
71696,The Bornless,Free,Free,To be announced,https://store.steampowered.com/app/2411280/The...,"Welcome to The Bornless, an action horror FPS....",,,,,Cathedral Studios,Cathedral Studios,"['English', 'French', 'Italian', 'German', 'Sp...","['Early Access', 'Horror', 'FPS', 'Shooter', '...","['Single-player', 'Online PvP', 'Online Co-op'...",OS: | Windows 10 or higher | Processor: | In...
71697,Winter Memories,Free,Free,Coming soon,https://store.steampowered.com/app/2495450/Win...,After the unforgettable summer memories you’ve...,,,,,Dojin Otome,Kagura Games,"['English', 'Japanese', 'Simplified Chinese']","['Casual', 'RPG', 'Simulation', 'Sexual Conten...",['Single-player'],OS: | Windows® 7/8/8.1/10/11 | Processor: | ...
71698,Call of Duty®: Modern Warfare® III,$69.99,$69.99,Coming soon,https://store.steampowered.com/app/2519060/Cal...,In the direct sequel to the record-breaking Ca...,,,,,"Sledgehammer Games, Treyarch, Infinity Ward, B...",Activision,"['English', 'French', 'Italian', 'German', 'Sp...","['Action', 'FPS', 'Shooter', 'First-Person', '...","['Single-player', 'Online PvP', 'Online Co-op'...",Requires a 64-bit processor and operating syst...


In [4]:
def normalize_column(row: pd.Series, max_: float = 1.0) -> pd.Series:
    new_row = row - row.min()
    return new_row / (new_row.max() / max_)


_test_df = pd.DataFrame(np.random.randint(50, 100, size=(100, 4)), columns=list("ABCD"))
row = normalize_column(_test_df["A"], max_=100)
row.mean(), row.min(), row.max()

(53.40816326530613, 0.0, 100.0)

In [5]:
def preprocess_df(df: pd.DataFrame) -> pd.DataFrame:
    # Remove TM and R in Titles
    df["Title"] = df["Title"].str.replace("™", "").str.replace("®", "")

    # Add reviews count
    for idx, row in tqdm(df.iterrows()):
        review_number = row["All Reviews Number"]
        if review_number is np.nan:
            continue

        found = re.search(r"- \d{1,2}% of the ([0-9,]+) user reviews for this game are positive\.", review_number)
        if found is None:
            continue

        found = int(found[1].replace(",", ""))
        df.loc[idx, "Reviews Count"] = found

    # df["Reviews Count"] = df["Reviews Count"].fillna(0)
    df = df.dropna(subset=["Reviews Count"])

    # Delete unrelated fields
    df = df.drop(
        columns=[
            "Original Price",
            "Discounted Price",
            "Link",
            "Recent Reviews Summary",
            "Recent Reviews Number",
            "All Reviews Number",
            "Supported Languages",
            "Minimum Requirements",
        ]
    )

    # Exclude unpublished games
    df = df[~df["Release Date"].isin(("Coming soon", "To be announced"))]

    # Convert timestamp
    df["Release Date"] = pd.to_datetime(df["Release Date"], format="%d %b, %Y", errors="coerce")
    df = df.dropna(subset=["Release Date"])
    df["Year"] = df["Release Date"].dt.year
    df = df.drop(columns=["Release Date"])

    # Convert to lists
    df["Popular Tags"] = df["Popular Tags"].apply(ast.literal_eval)
    df["Game Features"] = df["Game Features"].apply(ast.literal_eval)

    # Reviews to score
    rewies_to_score = {
        "Overwhelmingly Negative": 0,
        "Mostly Negative": 1,
        "Mixed": 2,
        "Positive": 3,
        "Very Positive": 4,
        "Mostly Positive": 5,
        "Overwhelmingly Positive": 6,
        np.nan: 0,
    }
    for idx, row in tqdm(df.iterrows()):
        df.loc[idx, "All Reviews Summary"] = rewies_to_score[row["All Reviews Summary"]]

    # Fill empty Game Description
    df["Game Description"] = df["Game Description"].fillna("")

    df.reset_index(drop=True, inplace=True)

    df["Reviews Count Normalized"] = normalize_column(df["Reviews Count"], 1)
    df["All Reviews Summary Normalized"] = normalize_column(df["All Reviews Summary"], 1)
    for idx, row in tqdm(df.iterrows()):
        df.loc[idx, "Score"] = row["All Reviews Summary Normalized"] * row["Reviews Count Normalized"]

    df.to_csv(data / "df_processed.csv")

    return df


df_processed = preprocess_df(games_df.copy(deep=True))
df_processed

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

Unnamed: 0,Title,Game Description,All Reviews Summary,Developer,Publisher,Popular Tags,Game Features,Reviews Count,Year,Reviews Count Normalized,All Reviews Summary Normalized,Score
0,Baldur's Gate 3,"Baldur’s Gate 3 is a story-rich, party-based R...",4,Larian Studios,Larian Studios,"[RPG, Choices Matter, Character Customization,...","[Single-player, Online Co-op, LAN Co-op, Steam...",188617.0,2023,0.025388,0.666667,0.016925
1,Counter-Strike: Global Offensive,Counter-Strike: Global Offensive (CS: GO) expa...,4,"Valve, Hidden Path Entertainment",Valve,"[FPS, Shooter, Multiplayer, Competitive, Actio...","[Steam Achievements, Full controller support, ...",7428921.0,2012,1.000000,0.666667,0.666667
2,Apex Legends,"Apex Legends is the award-winning, free-to-pla...",4,Respawn Entertainment,Electronic Arts,"[Free to Play, Multiplayer, Battle Royale, Sho...","[Online PvP, Online Co-op, Steam Achievements,...",701597.0,2020,0.094440,0.666667,0.062960
3,Forza Horizon 5,Your Ultimate Horizon Adventure awaits! Explor...,4,Playground Games,Xbox Game Studios,"[Racing, Open World, Driving, Multiplayer, Aut...","[Single-player, Online PvP, Online Co-op, Cros...",123162.0,2021,0.016577,0.666667,0.011051
4,Call of Duty,"Welcome to Call of Duty® HQ, the home of Call ...",2,"Infinity Ward, Raven Software, Beenox, Treyarc...",Activision,"[FPS, Multiplayer, Shooter, Action, Singleplay...","[Single-player, Online PvP, Online Co-op, Cros...",236876.0,2022,0.031884,0.333333,0.010628
...,...,...,...,...,...,...,...,...,...,...,...,...
5326,O2Jam Online,From the iconic songs to customizing your game...,0,VALOFE,VALOFE,"[Casual, Rhythm, Relaxing, Colorful, Score Att...","[Single-player, In-App Purchases]",1805.0,2023,0.000241,0.0,0.000000
5327,Call of Duty: Warzone,"Welcome to Warzone™, the massive free-to-play ...",1,"Infinity Ward, Raven Software, Beenox, Treyarc...",Activision,"[Free to Play, Battle Royale, Multiplayer, FPS...","[Single-player, Online PvP, Online Co-op, Cros...",24385.0,2022,0.003281,0.166667,0.000547
5328,Their Land,Their Land is an action-adventure first-person...,1,Thomas,Thomas,"[Singleplayer, Adventure, Action, Action-Adven...","[Single-player, Steam Achievements, Partial Co...",1220.0,2023,0.000163,0.166667,0.000027
5329,Call of Duty: Modern Warfare II,Call of Duty®: Modern Warfare® II drops player...,0,"Infinity Ward, Raven Software, Beenox, Treyarc...",Activision,"[Action, FPS, Multiplayer, Singleplayer, Shoot...","[Single-player, Online PvP, Online Co-op, Cros...",1516.0,2022,0.000202,0.0,0.000000


In [6]:
# Поиск игры по ближайшему названию
_names = list(df_processed["Title"].unique())


def find_names(name: str, count: int) -> list[str]:
    names = get_close_matches(name, _names, n=count * 3)
    names = (
        df_processed[df_processed["Title"].isin(names)]
        .sort_values(by="Score", ascending=False)["Title"]
        .values.tolist()
    )

    concrete = df_processed[df_processed["Title"] == name]
    if len(concrete) != 0:
        found_name = concrete.iloc[0]["Title"]
        names.remove(found_name)
        names.insert(0, found_name)

    return names[:count]


def find_name(name: str) -> str:
    found_names = find_names(name, count=1)
    if len(found_names) == 0:
        raise KeyError(f"Name not found: {name}")
    return found_names[0]


def describe(title: str) -> str:
    game = df_processed[df_processed["Title"] == title].iloc[0]
    return f"{title}, year: {game['Year']}, reviews_count: {game['Reviews Count']}, review: {game['All Reviews Summary']}, score: {round(game['Score'], 5)}"


find_names("Half Life", count=5), find_name("Left 4 Dead 2"), describe("Half-Life 2")

(['Half-Life 2',
  'Half-Life: Alyx',
  'Wobbly Life',
  'Galaxy Life',
  'Immortal Life'],
 'Left 4 Dead 2',
 'Half-Life 2, year: 2007, reviews_count: 131995.0, review: 6, score: 0.01777')

In [7]:
def best_from(count: int, field: str, name: str) -> list[str]:
    return list(df_processed[df_processed[field] == name].sort_values(by="Score", ascending=False).Title[:count])


best_from(count=10, field="Developer", name="Respawn Entertainment"), best_from(
    count=10, field="Publisher", name="Valve"
)

(['Apex Legends',
  'Titanfall 2',
  'STAR WARS Jedi: Fallen Order Deluxe Edition',
  'STAR WARS Jedi: Fallen Order',
  'Medal of Honor: Above and Beyond'],
 ['Counter-Strike: Global Offensive',
  'Dota 2',
  "Garry's Mod",
  'Team Fortress 2',
  'Left 4 Dead 2',
  'Portal 2',
  'Half-Life 2',
  'Portal',
  'Counter-Strike: Source',
  'Half-Life: Alyx'])

In [8]:
# KNN
def preprocess_df_knn(df: pd.DataFrame) -> pd.DataFrame:
    # OneHotEncoding for Tags and Features
    tags_and_features = set()
    for idx, row in tqdm(df.iterrows()):
        tags = row["Popular Tags"]
        for tag in tags:
            df.loc[idx, f"Tag {tag}"] = 1
            tags_and_features.add(f"Tag {tag}")

        features = row["Game Features"]
        for feature in features:
            df.loc[idx, f"Feature {feature}"] = 1
            tags_and_features.add(f"Feature {feature}")

    for col in tqdm(tags_and_features):
        df[col] = df[col].replace(np.nan, 0)

    df = df.drop(columns=["Popular Tags", "Game Features"])

    # OneHotEncoding for regular fields
    # df = pd.get_dummies(df, columns=["Publisher"])

    df = df.drop(
        columns=[
            "Title",
            "Game Description",
            "Developer",
            "Publisher",
            "Reviews Count Normalized",
            "All Reviews Summary Normalized",
            "Score",
        ]
    )

    df["Year"] = normalize_column(df["Year"], 5)
    df["Reviews Count"] = normalize_column(df["Reviews Count"], 0.5)
    df["All Reviews Summary"] = normalize_column(df["All Reviews Summary"], 0.5)

    return df


df_knn = preprocess_df_knn(df_processed.copy(deep=True))
df_knn

0it [00:00, ?it/s]

  0%|          | 0/471 [00:00<?, ?it/s]

Unnamed: 0,All Reviews Summary,Reviews Count,Year,Tag RPG,Tag Choices Matter,Tag Character Customization,Tag Story Rich,Tag Adventure,Tag Online Co-Op,Tag CRPG,...,Tag Birds,Tag Spelling,Tag Fox,Tag Mahjong,Tag Snooker,Tag Steam Machine,Tag Video Production,Feature Mods,Feature Mods (require HL2),Tag Web Publishing
0,0.333333,0.012694,5.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.333333,0.500000,2.380952,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.333333,0.047220,4.285714,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.333333,0.008289,4.523810,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.166667,0.015942,4.761905,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5326,0.0,0.000121,5.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5327,0.083333,0.001640,4.761905,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5328,0.083333,0.000081,5.000000,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5329,0.0,0.000101,4.761905,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
def get_knn(df: pd.DataFrame) -> NearestNeighbors:
    knn = NearestNeighbors(p=1)
    knn.fit(df.values)

    # dump
    with open(data / "knn.pkl", "wb") as f:
        pickle.dump(knn, f)

    # load
    # with open(data / 'knn.pkl', 'rb') as f:
    #     knn = pickle.load(f)

    return knn


knn = get_knn(df_knn)

In [10]:
def recomend_knn(target_name: str, top: int = 10) -> list[tuple[str, float]]:
    target_idx = df_processed[df_processed["Title"] == target_name].index[0]

    row = df_knn.iloc[target_idx]

    distances, idxs = knn.kneighbors([row], top + 1, return_distance=True)
    distances, idxs = distances[0], idxs[0]
    list_ = []

    for i, idx in enumerate(idxs):
        list_.append((df_processed["Title"].iloc[idx], distances[i]))

    return list_


recomend_knn("It Takes Two", top=5)

[('It Takes Two', 0.0),
 ('Unravel Two', 18.412155588006645),
 ('Sonic Colors: Ultimate', 19.817160597105943),
 ('A Way Out', 21.410222262399884),
 ('Trine 4: The Nightmare Prince', 21.649901447747673),
 ('Fireboy & Watergirl: Elements', 21.650473469830594)]

In [11]:
def get_cosine_sim(df: pd.DataFrame) -> NearestNeighbors:
    cos_sim = cosine_similarity(df)

    # dump
    with open(data / "cos_sim.pkl", "wb") as f:
        pickle.dump(cos_sim, f)

    # load
    # with open(data / 'cos_sim.pkl', 'rb') as f:
    #     cos_sim = pickle.load(f)

    return cos_sim


cosine_sim = get_cosine_sim(df_knn)
cosine_sim.shape

(5331, 5331)

In [12]:
_indices = pd.Series(df_processed.index, index=df_processed["Title"]).drop_duplicates()


def get_recomend_cosim(cosim_matrix: np.ndarray) -> Predictor:
    def recomend(title: str, top: int = 10) -> list[tuple[str, float]]:
        idx = _indices[title]

        sim_scores = list(enumerate(cosim_matrix[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1 : top + 1]
        game_indices = [i[0] for i in sim_scores]

        list_ = df_processed["Title"].iloc[game_indices].tolist()
        list_ = [[list_[idx], score[1]] for idx, score in enumerate(sim_scores)]
        return list_

    return recomend

In [13]:
recomend_cosine_sim = get_recomend_cosim(cosine_sim)
recomend_cosine_sim.__name__ = "recomend_cosine_sim"
recomend_cosine_sim("Half-Life 2", top=3), recomend_cosine_sim("Cyberpunk 2077", top=3)

([['Half-Life 2: Episode Two', 0.8557271480367542],
  ['Half-Life 2: Episode One', 0.8236379639688125],
  ['Black Mesa', 0.7259981585523272]],
 [['Cloudpunk', 0.8255373817861276],
  ['The Elder Scrolls II: Daggerfall', 0.8184796003946935],
  ['Deus Ex: Mankind Divided', 0.8163374216410503]])

In [14]:
def get_cosine_sim_tf_idf(df: pd.DataFrame) -> np.ndarray:
    # Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
    tfidf = TfidfVectorizer(stop_words="english")

    # Construct the required TF-IDF matrix by fitting and transforming the data
    tfidf_matrix = tfidf.fit_transform(df["Game Description"])

    # Output the shape of tfidf_matrix
    print(tfidf_matrix.shape, tfidf.get_feature_names_out()[2000:2010])

    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # dump
    with open(data / "cosine_sim.pkl", "wb") as f:
        pickle.dump(cosine_sim, f)

    # load
    # with open(data / 'cosine_sim_tf_idf.pkl', 'rb') as f:
    #     cosine_sim = pickle.load(f)

    return cosine_sim


cosine_sim_tf_idf = get_cosine_sim_tf_idf(df_processed)
cosine_sim_tf_idf.shape

(5331, 15223) ['bundled' 'bunker' 'bunnies' 'bunny' 'burden' 'burdened' 'burdens'
 'burdensome' 'bureau' 'bureaucracy']


(5331, 5331)

In [15]:
recomend_cosine_sim_tf_idf = get_recomend_cosim(cosine_sim_tf_idf)
recomend_cosine_sim_tf_idf.__name__ = "recomend_cosine_sim_tf_idf"
recomend_cosine_sim_tf_idf("Half-Life 2", top=3), recomend_cosine_sim("Cyberpunk 2077", top=3)

([['Half-Life 2: Episode One', 0.2629285750621357],
  ['The Last of Us Part I', 0.1773906146022805],
  ['Half-Life: Alyx', 0.1711819321882433]],
 [['Cloudpunk', 0.8255373817861276],
  ['The Elder Scrolls II: Daggerfall', 0.8184796003946935],
  ['Deus Ex: Mankind Divided', 0.8163374216410503]])

In [16]:
def get_combined_model(*models: np.ndarray) -> np.ndarray:
    model = np.mean(np.array(models), axis=0)

    # dump
    with open(data / "combined_model.pkl", "wb") as f:
        pickle.dump(model, f)

    # load
    # with open(data / 'combined_model.pkl', 'rb') as f:
    #     model = pickle.load(f)

    return model


combined_cosine_sim = get_combined_model(cosine_sim_tf_idf, cosine_sim / 2)
combined_cosine_sim.shape

(5331, 5331)

In [17]:
recomend_combined_cosine_sim = get_recomend_cosim(combined_cosine_sim)
recomend_combined_cosine_sim.__name__ = "recomend_combined_cosine_sim"
recomend_combined_cosine_sim("Half-Life 2", top=3)

[['Half-Life 2: Episode One', 0.337373778523271],
 ['Half-Life 2: Episode Two', 0.2761986276184936],
 ['Black Mesa', 0.25516562705116086]]

In [18]:
def closest_to(predictor: Predictor, target_name: str, n: int) -> None:
    print(describe(target_name))
    for predict_name, conf in predictor(target_name, 4):
        if predict_name == target_name:
            continue
        print(f"{round(conf, 5)}: {describe(predict_name)}")
    print()


for model in (recomend_knn, recomend_cosine_sim_tf_idf, recomend_cosine_sim, recomend_combined_cosine_sim):
    print(model.__name__.upper())
    closest_to(model, "Counter-Strike: Source", n=5)

RECOMEND_KNN
Counter-Strike: Source, year: 2004, reviews_count: 105942.0, review: 6, score: 0.01426
19.1261: Tom Clancy's Ghost Recon, year: 2008, reviews_count: 1151.0, review: 4, score: 0.0001
19.36233: Call of Duty: Modern Warfare 2 (2009), year: 2009, reviews_count: 28905.0, review: 4, score: 0.00259
19.54895: Insurgency, year: 2014, reviews_count: 86222.0, review: 4, score: 0.00774
20.8389: Red Orchestra 2: Heroes of Stalingrad with Rising Storm, year: 2011, reviews_count: 23292.0, review: 4, score: 0.00209

RECOMEND_COSINE_SIM_TF_IDF
Counter-Strike: Source, year: 2004, reviews_count: 105942.0, review: 6, score: 0.01426
0.406: Counter-Strike Nexon: Studio, year: 2014, reviews_count: 41068.0, review: 2, score: 0.00184
0.22978: Left 4 Dead, year: 2008, reviews_count: 41313.0, review: 6, score: 0.00556
0.21379: Half-Life Deathmatch: Source, year: 2006, reviews_count: 3083.0, review: 5, score: 0.00034
0.17505: Sudden Strike 4, year: 2017, reviews_count: 5364.0, review: 5, score: 0.000

In [21]:
def closest_to_best(predictor: Predictor, n: int, best: int) -> None:
    best_sellers = df_processed.loc[
        df_processed["Publisher"].isin(("Valve", "Activision", "Electronic Arts"))
    ].sort_values(by="Reviews Count", ascending=False)[:best]
    for i in range(n):
        target_name = best_sellers["Title"].iloc[np.random.randint(0, len(best_sellers))]
        closest_to(predictor=predictor, target_name=target_name, n=n)


closest_to_best(recomend_knn, n=4, best=100)

Medal of Honor, year: 2010, reviews_count: 4763.0, review: 4, score: 0.00043
12.71512: Call of Duty 4: Modern Warfare (2007), year: 2007, reviews_count: 17103.0, review: 4, score: 0.00153
12.95239: Call of Duty (2003), year: 2006, reviews_count: 4870.0, review: 4, score: 0.00044
14.4764: Brothers in Arms: Hell's Highway, year: 2008, reviews_count: 1695.0, review: 4, score: 0.00015
14.56008: Far Cry 2, year: 2008, reviews_count: 13029.0, review: 5, score: 0.00146

It Takes Two, year: 2021, reviews_count: 113777.0, review: 6, score: 0.01531
18.41216: Unravel Two, year: 2020, reviews_count: 3923.0, review: 4, score: 0.00035
19.81716: Sonic Colors: Ultimate, year: 2023, reviews_count: 311.0, review: 2, score: 1e-05
21.41022: A Way Out, year: 2020, reviews_count: 32648.0, review: 4, score: 0.00293
21.6499: Trine 4: The Nightmare Prince, year: 2019, reviews_count: 9114.0, review: 4, score: 0.00082

Need for Speed Rivals, year: 2020, reviews_count: 4530.0, review: 5, score: 0.00051
7.08769: N

In [22]:
def closest_to_best_models(n: int, best: int) -> None:
    best_sellers = df_processed.loc[
        df_processed["Publisher"].isin(("Valve", "Activision", "Electronic Arts"))
    ].sort_values(by="Reviews Count", ascending=False)[:best]
    names = []
    for i in range(n):
        names.append(best_sellers["Title"].iloc[np.random.randint(0, len(best_sellers))])

    for model in (recomend_knn, recomend_cosine_sim_tf_idf, recomend_cosine_sim, recomend_combined_cosine_sim):
        print(model.__name__.upper())
        for i in range(n):
            closest_to(predictor=model, target_name=names[i], n=n)


closest_to_best_models(2, 100)

RECOMEND_KNN
Command & Conquer 3: Tiberium Wars, year: 2009, reviews_count: 6201.0, review: 4, score: 0.00056
6.16685: Command & Conquer 3: Kane's Wrath, year: 2009, reviews_count: 3403.0, review: 6, score: 0.00046
10.0002: Command & Conquer: Red Alert 3 - Uprising, year: 2009, reviews_count: 3223.0, review: 4, score: 0.00029
11.00035: Command & Conquer: Red Alert 3, year: 2009, reviews_count: 11362.0, review: 4, score: 0.00102
12.90513: Army Men RTS, year: 2017, reviews_count: 788.0, review: 4, score: 7e-05

Vampire: The Masquerade - Bloodlines, year: 2007, reviews_count: 10920.0, review: 4, score: 0.00098
14.0006: Thief: Deadly Shadows, year: 2007, reviews_count: 1978.0, review: 4, score: 0.00018
15.19111: Thief II: The Metal Age, year: 2012, reviews_count: 1531.0, review: 4, score: 0.00014
16.4292: Marc Eckō's Getting Up: Contents Under Pressure, year: 2013, reviews_count: 1589.0, review: 4, score: 0.00014
16.66734: Black Mirror I, year: 2014, reviews_count: 886.0, review: 4, score: