In [15]:
import tkinter as tk
from tkinter import ttk
from tkinter import messagebox
from tkinter import scrolledtext
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import polars as pl
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

In [2]:
# Define a class named GameGenreDataset that inherits from Dataset
class GameGenreDataset(Dataset):
    # Constructor method that initializes the dataset with genres and ratings
    def __init__(self, genres, ratings):
        self.genres = genres  # Assign genres to a member variable
        self.ratings = ratings  # Assign ratings to a member variable

    # Method to get an item from the dataset given an index
    def __getitem__(self, idx):
        # Return a dictionary containing genres and ratings as tensors
        return {
            "genres": torch.tensor(self.genres[idx], dtype=torch.float), # Convert genres to a tensor because they are floats
            "rating": torch.tensor(self.ratings[idx], dtype=torch.float) # Convert ratings to a tensor because they are floats
        }

    # Method to get the length of the dataset
    def __len__(self):
        return len(self.genres)  # Return the length of the genres

# Define a class named GenreBasedModel that inherits from nn.Module
class GenreBasedModel(nn.Module):
    # Constructor method that initializes the model with the number of genres
    def __init__(self, num_genres):
        super(GenreBasedModel, self).__init__()
        #This layer takes the number of genres as input and outputs a vector of size 32 for each genre
        self.genre_embed = nn.Linear(num_genres, 32)  # Linear layer for genre embedding
        self.out = nn.Linear(32, 1)  # Linear layer for final output 

    # Method that defines the forward pass of the model
    def forward(self, genres):
        genre_embeds = self.genre_embed(genres)  # Get genre embeddings
        output = self.out(genre_embeds)  # Pass embeddings through final output layer
        return output.view(-1)  # Flatten the output tensor before returning
    


# You would then update your data preparation, training routine, and GUI accordingly.

In [7]:
#Reading the dataset
df = pd.read_csv("games.csv")

#transforming each element in the 'Genres' column from a string 
#representation of a Python object into a list of unique elements
df['Genres'] = df['Genres'].apply(lambda x: list(set(eval(x))))

#filling missing values in the 'Rating' column with the median value
df['Rating'] = df['Rating'].fillna(df['Rating'].median())

#Setting whether to use the GPU or CPU for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Creating a MultiLabelBinarizer object and encoding the 'Genres' column because it contains multiple labels
mlb_genres = MultiLabelBinarizer()

genres_encoded = mlb_genres.fit_transform(df['Genres']) #Encoding the 'Genres' column
ratings = df['Rating'].values #Extracting all possible ratings from the 'Rating' column

X_train, X_test, y_train, y_test = train_test_split(genres_encoded, ratings, test_size=0.1, random_state=42)
train_dataset = GameGenreDataset(X_train, y_train) #Creating a training dataset
valid_dataset = GameGenreDataset(X_test, y_test) #Creating a validation dataset
model = GenreBasedModel(num_genres=genres_encoded.shape[1]).to(device) #Creating an instance of the GenreBasedModel class
optimizer = optim.Adam(model.parameters(), lr=0.001) #Creating an Adam optimizer and passing the model parameters and learning rate


In [8]:
#Training the model
# Training the model
def train_model(model, train_loader, valid_loader, optimizer, num_epochs=10):
    # Using Mean-Squared Error as the loss function
    criterion = nn.MSELoss()
    
    # Loop over the number of epochs
    for epoch in range(num_epochs):
        # Set the model to training mode
        model.train()
        
        # Initialize total loss for this epoch
        total_loss = 0
        
        # Loop over the training data loader
        for data in train_loader:
            # Get the input data and move it to the device (GPU/CPU)
            genres = data['genres'].to(device)
            ratings = data['rating'].to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass: compute the model's outputs
            outputs = model(genres)
            
            # Compute the loss between the outputs and the actual ratings
            loss = criterion(outputs, ratings)
            
            # Backward pass: compute the gradients
            loss.backward()
            
            # Update the model parameters
            optimizer.step()
            
            # Accumulate the loss
            total_loss += loss.item()
        
        # Print the average training loss for the epoch
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {total_loss / len(train_loader)}')

        # Set the model to evaluation mode
        model.eval()
        
        # Initialize total validation loss for this epoch
        total_val_loss = 0
        
        # Disable gradient calculation for validation as we are not updating the model
        with torch.no_grad():
            # Loop over the validation data loader
            for data in valid_loader:
                # Get the input data and move it to the device (GPU/CPU)
                genres = data['genres'].to(device)
                ratings = data['rating'].to(device)
                
                # Forward pass: compute the model's outputs
                outputs = model(genres)
                
                # Compute the loss between the outputs and the actual ratings
                loss = criterion(outputs, ratings)
                
                # Accumulate the validation loss
                total_val_loss += loss.item()
        
        # Print the average validation loss for the epoch
        print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {total_val_loss / len(valid_loader)}')


In [9]:
# Impute missing ratings with the median rating
df['Rating'] = df['Rating'].fillna(df['Rating'].median())

In [10]:


# Initialize Tkinter
root = tk.Tk()
root.title("GAME RECOMMENDER SYSTEM")

# Function to handle button click event
def show_recommendations():
    try:
        input_genres = [genre.strip().title() for genre in genres_input.get().split(',')]
        num_games = int(num_games_input.get())  # Get the number of games input by the user
        # Filter games based on input genres
        matching_games = df[df['Genres'].apply(lambda x: all(genre in x for genre in input_genres))]
        if matching_games.empty:
            messagebox.showinfo("No games found", "No games found for the specified genres.")
        else:
            # Sort games by predicted rating
            matching_games['Predicted Rating'] = matching_games.apply(lambda row: model(torch.tensor(mlb_genres.transform([row['Genres']]), dtype=torch.float).to(device)).item(), axis=1)
            matching_games = matching_games.sort_values(by='Predicted Rating', ascending=False).head(num_games)
            recommendations_text.delete(1.0, tk.END)
            for _, row in matching_games.iterrows():
                game_info = f"Game: {row['Title']}, Predicted Rating: {row['Predicted Rating']:.2f}\n"
                recommendations_text.insert(tk.END, game_info)
    except Exception as e:
        messagebox.showerror("Error", str(e))

# Genres input label and entry
genres_label = tk.Label(root, text="Enter Genres:")
genres_label.pack()
genres_input = tk.Entry(root)
genres_input.pack()

# Number of games label and entry
num_games_label = tk.Label(root, text="Number of Games:")
num_games_label.pack()
num_games_input = tk.Entry(root)
num_games_input.pack()

# Button to show recommendations
show_button = tk.Button(root, text="Show Recommendations", command=show_recommendations)
show_button.pack()

# Text area to display recommendations
recommendations_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=50, height=20)
recommendations_text.pack()

# Run the Tkinter event loop
root.mainloop()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching_games['Predicted Rating'] = matching_games.apply(lambda row: model(torch.tensor(mlb_genres.transform([row['Genres']]), dtype=torch.float).to(device)).item(), axis=1)


<h1>Attempt with NCF

In [2]:

# Define a custom dataset class that inherits from torch.utils.data.Dataset
class GameGenreDataset(Dataset):
    def __init__(self, game_ids, genres, ratings):
        # Store the game IDs
        self.game_ids = game_ids
        # Store the genres
        self.genres = genres
        # Store the ratings
        self.ratings = ratings

    # Method to get a single item from the dataset at a given index
    def __getitem__(self, idx):
        # Return a dictionary containing the game ID, genres, and rating as tensors
        return {
            "game_id": torch.tensor(self.game_ids[idx], dtype=torch.long), # Convert game ID to a float tensor
            "genres": torch.tensor(self.genres[idx], dtype=torch.float), # Convert genres to a float tensor
            "rating": torch.tensor(self.ratings[idx], dtype=torch.float) # Convert rating to a float tensor
        }
    
    #Return the length of the dataset
    def __len__(self):
        return len(self.game_ids)


In [3]:
# Define the neural collaborative filtering model class that inherits from nn.Module

class NCFModel(nn.Module):
    # Constructor method to initialize the model
    def __init__(self, num_genres, num_games, embedding_dim=10):
        # Call the constructor of the parent class (nn.Module)
        super(NCFModel, self).__init__()
        
        # Define an embedding layer for games with the given number of games and embedding dimension
        self.game_embedding = nn.Embedding(num_games, embedding_dim)
        
        # Define an embedding layer for genres with the given number of genres and embedding dimension
        self.genre_embedding = nn.Embedding(num_genres, embedding_dim)
        
        # Define fully connected layers for the neural network component
        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 50),  # Linear layer with input size embedding_dim * 2 and output size 50
            nn.ReLU(),  # ReLU activation function
            nn.Linear(50, 1)  # Linear layer with input size 50 and output size 1
        )
        
        # Define embedding layers for the matrix factorization component
        self.mf_user_embedding = nn.Embedding(num_games, embedding_dim)  # Embedding layer for games
        self.mf_item_embedding = nn.Embedding(num_genres, embedding_dim)  # Embedding layer for genres

    # Method to define the forward pass of the model
    def forward(self, game_ids, genres):
        # Get the embeddings for the given game IDs
        game_embeds = self.game_embedding(game_ids)
        
        # Get the embeddings for the given genres (taking the argmax across the dimension)
        genre_embeds = self.genre_embedding(genres.argmax(dim=1))
        
        # Get the embeddings for the matrix factorization path (game IDs)
        mf_game_embeds = self.mf_user_embedding(game_ids)
        
        # Get the embeddings for the matrix factorization path (genres, taking the argmax across the dimension)
        mf_genre_embeds = self.mf_item_embedding(genres.argmax(dim=1))

        # Concatenate the game and genre embeddings for the neural network path
        x = torch.cat((game_embeds, genre_embeds), dim=1)
        
        # Pass the concatenated embeddings through the fully connected layers
        neural_output = self.fc_layers(x)
        
        # Calculate the dot product of the embeddings for the matrix factorization path
        mf_output = (mf_game_embeds * mf_genre_embeds).sum(1, keepdim=True)

        # Combine the outputs of the neural network path and the matrix factorization path
        final_output = neural_output + mf_output
        
        # Flatten the final output tensor before returning
        return final_output.view(-1)


In [4]:
# Load and prepare data
df = pd.read_csv("games.csv") # Load the dataset
df['Genres'] = df['Genres'].apply(lambda x: list(set(eval(x)))) # Convert genres to a list
df['Rating'] = df['Rating'].fillna(df['Rating'].median()) # Fill missing ratings with median

mlb_genres = MultiLabelBinarizer() # Create a MultiLabelBinarizer object, as we have multiple labels
genres_encoded = mlb_genres.fit_transform(df['Genres'])  # Encode the 'Genres' column
ratings = df['Rating'].values # Extract ratings from the 'Rating' column
game_ids = df.index.values # Use the index as the game ID

In [12]:
df['Genres'].head(20)

0                                 [RPG, Adventure]
1                 [Indie, RPG, Brawler, Adventure]
2                                 [RPG, Adventure]
3     [Turn Based Strategy, Indie, RPG, Adventure]
4                     [Indie, Platform, Adventure]
5                           [Simulator, Adventure]
6     [Turn Based Strategy, Indie, RPG, Adventure]
7                            [Platform, Adventure]
8                                [Strategy, Indie]
9                                   [RPG, Brawler]
10           [Turn Based Strategy, RPG, Adventure]
11                              [Indie, Adventure]
12                       [RPG, Brawler, Adventure]
13          [Puzzle, Platform, Shooter, Adventure]
14                                [RPG, Adventure]
15                    [Indie, Platform, Adventure]
16            [RPG, Brawler, Simulator, Adventure]
17                       [RPG, Shooter, Adventure]
18                     [Puzzle, Platform, Shooter]
19                           [P

In [13]:
df['Genres'].tail(20)

1492                        [Platform, Arcade, Adventure]
1493                                              [Music]
1494                                [Fighting, Adventure]
1495               [Platform, Arcade, Shooter, Adventure]
1496                                   [Sport, Simulator]
1497                                           [Fighting]
1498                                            [Shooter]
1499                                    [Fighting, Sport]
1500                              [Indie, RPG, Adventure]
1501                         [Racing, Shooter, Adventure]
1502                                           [Platform]
1503                                           [Fighting]
1504                      [Strategy, Tactical, Simulator]
1505                                           [Fighting]
1506                        [Puzzle, Platform, Adventure]
1507                         [Point-and-Click, Adventure]
1508                                     [Arcade, Racing]
1509          

In [5]:
train_indices, test_indices, X_train, X_test, y_train, y_test = train_test_split(
    range(len(df)),
    genres_encoded, 
    ratings, 
    test_size=0.1, 
    random_state=42
)

train_game_ids = game_ids[train_indices]
test_game_ids = game_ids[test_indices]

train_dataset = GameGenreDataset(train_game_ids, X_train, y_train)
valid_dataset = GameGenreDataset(test_game_ids, X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

# Initialize Model and Optimizer
num_games = len(df)
num_genres = genres_encoded.shape[1]
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = NCFModel(num_genres, num_games, embedding_dim=10).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Lowered learning rate for potentially better convergence


In [6]:
# Training Function
def train_model(model, train_loader, valid_loader, optimizer, num_epochs=50):  # Increased epochs for better training
    # Define the loss function as Mean-Squared Error
    criterion = nn.MSELoss()
    
    # Loop over the number of epochs
    for epoch in range(num_epochs):
        # Set the model to training mode
        model.train()
        
        # Initialize total loss for this epoch
        total_loss = 0
        
        # Loop over the training data loader
        for data in train_loader:
            # Get the game IDs and move them to the device (GPU/CPU)
            game_ids = data['game_id'].to(device)
            # Get the genres and move them to the device (GPU/CPU)
            genres = data['genres'].to(device)
            # Get the ratings and move them to the device (GPU/CPU)
            ratings = data['rating'].to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass: compute the model's outputs
            outputs = model(game_ids, genres)
            
            # Compute the loss between the outputs and the actual ratings
            loss = criterion(outputs, ratings)
            
            # Backward pass: compute the gradients
            loss.backward()
            
            # Update the model parameters
            optimizer.step()
            
            # Accumulate the loss
            total_loss += loss.item()
        
        # Print the average training loss for the epoch
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {total_loss / len(train_loader)}')

        # Set the model to evaluation mode
        model.eval()
        
        # Initialize total validation loss for this epoch
        total_val_loss = 0
        
        # Disable gradient calculation for validation
        with torch.no_grad():
            # Loop over the validation data loader
            for data in valid_loader:
                # Get the game IDs and move them to the device (GPU/CPU)
                game_ids = data['game_id'].to(device)
                # Get the genres and move them to the device (GPU/CPU)
                genres = data['genres'].to(device)
                # Get the ratings and move them to the device (GPU/CPU)
                ratings = data['rating'].to(device)
                
                # Forward pass: compute the model's outputs
                outputs = model(game_ids, genres)
                
                # Compute the loss between the outputs and the actual ratings
                loss = criterion(outputs, ratings)
                
                # Accumulate the validation loss
                total_val_loss += loss.item()
        
        # Print the average validation loss for the epoch
        print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {total_val_loss / len(valid_loader)}')

# Start training
train_model(model, train_loader, valid_loader, optimizer, num_epochs=100)  # Run the training function with 100 epochs


Epoch 1/100, Training Loss: 18.635967077210893
Epoch 1/100, Validation Loss: 16.014801216125488
Epoch 2/100, Training Loss: 11.39994675614113
Epoch 2/100, Validation Loss: 8.685265159606933
Epoch 3/100, Training Loss: 8.211082486219185
Epoch 3/100, Validation Loss: 7.574363422393799
Epoch 4/100, Training Loss: 7.1735158853752665
Epoch 4/100, Validation Loss: 7.072156715393066
Epoch 5/100, Training Loss: 6.4661305172498835
Epoch 5/100, Validation Loss: 6.578840541839599
Epoch 6/100, Training Loss: 5.79941436856292
Epoch 6/100, Validation Loss: 6.311034393310547
Epoch 7/100, Training Loss: 5.326683249584464
Epoch 7/100, Validation Loss: 5.966193628311157
Epoch 8/100, Training Loss: 4.876446003137633
Epoch 8/100, Validation Loss: 5.713441610336304
Epoch 9/100, Training Loss: 4.519052555394727
Epoch 9/100, Validation Loss: 5.525183916091919
Epoch 10/100, Training Loss: 4.166302059972009
Epoch 10/100, Validation Loss: 5.332479858398438
Epoch 11/100, Training Loss: 3.869305910066117
Epoch 11

In [18]:
genre_labels = mlb_genres.classes_
genre_labels

array(['Adventure', 'Arcade', 'Brawler', 'Card & Board Game', 'Fighting',
       'Indie', 'MOBA', 'Music', 'Pinball', 'Platform', 'Point-and-Click',
       'Puzzle', 'Quiz/Trivia', 'RPG', 'Racing', 'Real Time Strategy',
       'Shooter', 'Simulator', 'Sport', 'Strategy', 'Tactical',
       'Turn Based Strategy', 'Visual Novel'], dtype=object)

In [23]:
root = tk.Tk()
root.title("GAME RECOMMENDER SYSTEM")

# Function to handle dropdown menu selection
def on_genre_select(event):
    selected_genre = genres_menu.get()
    if selected_genre:
        current_text = genres_input.get()
        if current_text:
            current_text += ', '
        current_text += selected_genre
        genres_input.delete(0, tk.END)
        genres_input.insert(0, current_text)

# Function to handle button click event
def show_recommendations():
    try:
        input_genres = [genre.strip().title() for genre in genres_input.get().split(',')]
        num_games = int(num_games_input.get())  # Get the number of games input by the user
        # Filter games based on input genres
        matching_games = df[df['Genres'].apply(lambda x: all(genre in x for genre in input_genres))]
        if matching_games.empty:
            messagebox.showinfo("No games found", "No games found for the specified genres.")
        else:
            # Sort games by predicted rating
            input_genres_encoded = mlb_genres.transform([input_genres])
            input_genres_tensor = torch.tensor(input_genres_encoded, dtype=torch.float).to(device)
            matching_games['Predicted Rating'] = matching_games.apply(lambda row: model(torch.tensor([row.name], dtype=torch.long).to(device), input_genres_tensor).item(), axis=1)
            matching_games = matching_games.sort_values(by='Predicted Rating', ascending=False).head(num_games)
            recommendations_text.delete(1.0, tk.END)
            for _, row in matching_games.iterrows():
                game_info = f"Game: {row['Title']}, Predicted Rating: {row['Predicted Rating']:.2f}\n"
                recommendations_text.insert(tk.END, game_info)
    except Exception as e:
        messagebox.showerror("Error", str(e))

# Genres input label and entry
genre_labels = ['Adventure', 'Arcade', 'Brawler', 'Card & Board Game', 'Fighting',
       'Indie', 'MOBA', 'Music', 'Pinball', 'Platform', 'Point-and-Click',
       'Puzzle', 'Quiz/Trivia', 'RPG', 'Racing', 'Real Time Strategy',
       'Shooter', 'Simulator', 'Sport', 'Strategy', 'Tactical',
       'Turn Based Strategy', 'Visual Novel']

#Creating a drop-down list for genres
# genres_label = tk.Label(root, text="Enter Genres:")

# genres_label.pack()
# genres_input = tk.Entry(root)
# genres_input.pack()

# genres_label.pack()
# genres_menu = ttk.Combobox(root, values=genre_labels, state="readonly")
# genres_menu.pack()

genres_label = tk.Label(root, text="Enter Genres:")
genres_label.pack()

genres_input = tk.Entry(root)
genres_input.pack()

genres_menu = ttk.Combobox(root, values=genre_labels, state="readonly")
genres_menu.bind('<<ComboboxSelected>>', on_genre_select)
genres_menu.pack()

# Number of games label and entry
num_games_label = tk.Label(root, text="Number of Games:")
num_games_label.pack()
num_games_input = tk.Entry(root)
num_games_input.pack()

# Button to show recommendations
show_button = tk.Button(root, text="Show Recommendations", command=show_recommendations)
show_button.pack()

# Text area to display recommendations
recommendations_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=100, height=20)
recommendations_text.pack()

# Run the Tkinter event loop
root.mainloop()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching_games['Predicted Rating'] = matching_games.apply(lambda row: model(torch.tensor([row.name], dtype=torch.long).to(device), input_genres_tensor).item(), axis=1)
