In [None]:
%pip install -q ipywidgets
%pip install pandas
%pip install matplotlib
%pip install seaborn
%pip install scikit-learn

# VGM Games – Game Recommending System
A machine learning model that uses historical data to analyze and suggest games with a simple prompt-based GUI

## Import necessary libraries

In [None]:
import pandas as pa
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from ipywidgets import interact, widgets
from IPython.display import display, HTML

# supress warning from pandas
pa.options.mode.chained_assignment = None

## Read dataset from file
[Dataset - Video Game Sales with Ratings](https://www.kaggle.com/datasets/rush4ratio/video-game-sales-with-ratings)

In [None]:
# read dataset
videogames_df = pa.read_csv('./Video_Games_Sales_as_at_22_Dec_2016.csv')
videogames_df.head(10)

## Select relevant columns
Use a subset of columns from source

In [None]:
# select columns needed from set
videogames_df_filtered = videogames_df[['Name', 'Platform', 'Global_Sales', 'Genre', 'Critic_Score', 'User_Score']]

## Prepare data
Prepare dataset for training. Remove null or empty values and impute missing values

In [None]:
# check for missing values
videogames_df_filtered[['Name', 'Platform', 'Global_Sales', 'Genre', 'Critic_Score', 'User_Score']].isna().sum()

In [None]:
# drop entries with missing values and reset index
videogames_df_filtered.dropna(subset=['Name', 'Platform', 'Global_Sales', 'Genre', 'Critic_Score', 'User_Score'], axis=0, inplace=True)
videogames_df_filtered = videogames_df_filtered.reset_index(drop=True)

In [None]:
# check for missing values after dropping
videogames_df_filtered[['Name', 'Platform', 'Global_Sales', 'Genre', 'Critic_Score', 'User_Score']].isna().sum()

In [None]:

videogames_df_filtered['User_Score'].describe()

In [None]:
# convert future user scores set as 'tbd' and change dtype to float
videogames_df_filtered['User_Score'] = np.where(videogames_df_filtered['User_Score'] == 'tbd', np.nan, videogames_df_filtered['User_Score']).astype(float)

In [None]:
videogames_df_filtered['User_Score'].describe()

In [None]:
# impute missing values
mean_score = videogames_df_filtered['User_Score'].mean() 
videogames_df_filtered['User_Score'] = videogames_df_filtered['User_Score'].fillna(mean_score)
videogames_df_filtered['User_Score'].describe()

## Extract categorical columns and create dummy variables
Transform categorial columns into as many 0 or 1 variables

In [None]:
# extract categorical columns
categorical_columns = [col for col in videogames_df_filtered.columns if videogames_df_filtered[col].dtype=='O']
categorical_columns = categorical_columns[1:]

# create dummy variables
videogames_df_dummy = pa.get_dummies(data=videogames_df_filtered, columns=categorical_columns)

## Vectorize names
Measures the importance of the inputted title in the dataset
This is used to recommend games when inputed game is not found

In [None]:
# set text data for vectorization
game_names = videogames_df_dummy['Name'].drop_duplicates()
game_names = game_names.reset_index(drop=True)

# vectorize text
vectorizer = TfidfVectorizer(use_idf=True).fit(game_names)
game_title_vectors = vectorizer.transform(game_names)

## Numerical Feature Scaling
Ensures all features are equally accounted in the model, excluding name

In [None]:
# feature scaling data
features = videogames_df_dummy.drop(columns=['Name'], axis=1)

scale = StandardScaler()
scaled_features = scale.fit_transform(features)
scaled_features = pa.DataFrame(scaled_features, columns=features.columns)

## Train model
Nearest Neighbor algorithm with cosine similarity as a distance metric and k-neighbors to find the distances of neighbors for each point. 

In [None]:
# train the nearest neighbor model with cosine similarity
model = NearestNeighbors(n_neighbors=11, metric='cosine', algorithm='brute').fit(scaled_features)
distances, indices = model.kneighbors(scaled_features)

## Recommender function
Handles 2 scenerios:
- If the entered game name exists, it recommends games based on the model
- If the entered game name does not exist, it recommends games based on the title as alternative
Returns a dataframe containing games

In [None]:
def GameRecommender(entered_videogame):
    # check if the entered game exists
    videogame_idx = videogames_df_filtered.query("Name == @entered_videogame").index
    
    if videogame_idx.empty:
        # if the entered game is not found, recommend similar games from title as alternatives
        query_vector = vectorizer.transform([entered_videogame])
        similarity_scores = cosine_similarity(query_vector, game_title_vectors)

        flat_similarity = similarity_scores.flatten()

        # sort and get first 5
        sorted_idx = np.argsort(flat_similarity)[::-1]

        row_idx, col_idx = np.unravel_index(sorted_idx[:5], similarity_scores.shape)
        top_values = similarity_scores[row_idx, col_idx]

        matched_names = []
        for i in range(len(top_values)):
            matched_names.append(game_names[col_idx[i]])

        # create dataframe from found games
        recommended_videogame_list = pa.DataFrame({'Name': matched_names})
        return {'dataframe': recommended_videogame_list, 'found_game': False}
    
    else:
        # if the entered game exists, recommend similar games based on cosine similarity
        # prepare a DataFrame with indices and distances from model  
        videogame_dist_idx_df = pa.DataFrame()
        for idx in videogame_idx:
            # remove any games with the same name as entered
            vg_dist_idx_df = pa.concat([pa.DataFrame(indices[idx][1:]), pa.DataFrame(distances[idx][1:])], axis=1)
            videogame_dist_idx_df = pa.concat([videogame_dist_idx_df, vg_dist_idx_df])

        # set column names and sort by distance
        videogame_dist_idx_df = videogame_dist_idx_df.set_axis(['Idx', 'Dist'], axis=1)
        videogame_dist_idx_df = videogame_dist_idx_df.reset_index(drop=True)
        videogame_dist_idx_df = videogame_dist_idx_df.sort_values(by='Dist', ascending=True)

        # retrieve recommended games from the original dataset
        videogame_list = videogames_df_filtered.iloc[videogame_dist_idx_df['Idx']]

        # remove duplicate game names
        videogame_list = videogame_list.drop_duplicates(subset=['Name'], keep='first')

        # create dataframe of recommended games DataFrame and append their similarity distance
        videogame_list = videogame_list.reset_index(drop=True)
        recommended_videogame_list = pa.concat([videogame_list.head(10), pa.DataFrame(np.array(videogame_dist_idx_df['Dist'].head(10)), columns=['Cosine_Similarity_Distance'])], axis=1)

        return  {'dataframe': recommended_videogame_list, 'found_game': True}

In [None]:
# text input for entering games
game_name_widget = widgets.Text(
    description="Enter Game Name", 
    layout=widgets.Layout(display='flex', flex_flow='column', align_items='center'),
    style=dict(
    description_width='initial'))


# submit button
submit_button = widgets.Button(
    description="Submit", 
    button_style='success', 
    layout=widgets.Layout(display='flex', justify_content='center'))


# widget to display game recommendations
output_widget = widgets.Output()

# function to handle submission, displays recommendations
def on_submit_button(submit_button):
    entered_game = game_name_widget.value

    # result table
    recommended_games = GameRecommender(entered_game)['dataframe']
    header_description = f"Here are some recommendations for {entered_game}:" if GameRecommender(entered_game)['found_game'] else "That game was not found, did you mean one of these:"

    # result header
    list_header = widgets.Label(
        value = header_description, 
        layout=widgets.Layout(display='flex', flex_flow='column', align_items='center'),
        style=dict(
        description_width='initial',
        gap='10px'))
    
    # clear previous outputs
    with output_widget:
        output_widget.clear_output()
    
    # display recommendations
    with output_widget:
        if not recommended_games.empty:
            display(list_header,recommended_games)

# handle button event
submit_button.on_click(on_submit_button)

# display widgets
display(widgets.VBox([game_name_widget, submit_button, output_widget], layout=widgets.Layout(align_items='center', width="auto", height='auto')))

# set custom styling
display(HTML("""<style> 
             .widget-vbox { border: grey solid 1px; border-radius: 10px; box-shadow: rgba(0, 0, 0, 0.35) 0px 5px 15px;max-width: 60vw; margin: 2rem auto; padding: 2rem} 
             .widget-text { gap: 5px;} 
             .widget-button { border-radius: 20px; margin: 10px; font-size: 1rem; description-width: initial} 
             .widget-label { font-size: 1rem }
             </style>"""))