In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/pivot-table2-csv/pivot_table.csv


In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

# Load and prepare data
df_pivot = pd.read_csv('/kaggle/input/pivot-table2-csv/pivot_table.csv', index_col=0)
df_melt = df_pivot.stack().reset_index().rename(columns={'level_1': 'Movie_Id', 0: 'Rating'})

# Convert to user-movie matrix
user_movie_matrix = df_pivot.values
n_users, n_movies = user_movie_matrix.shape

# Flatten the matrix for splitting
def flatten_matrix(matrix):
    return [(i, j, matrix[i, j]) for i in range(matrix.shape[0]) for j in range(matrix.shape[1]) if matrix[i, j] > 0]

ratings_flat = flatten_matrix(user_movie_matrix)
train_data, test_data = train_test_split(ratings_flat, test_size=0.2, random_state=42)

# Convert to tensors
def to_tensor(data, n_users, n_movies):
    matrix = torch.zeros(n_users, n_movies)
    for i, j, r in data:
        matrix[int(i), int(j)] = r
    return matrix

train_tensor = to_tensor(train_data, n_users, n_movies)
test_tensor = to_tensor(test_data, n_users, n_movies)

# Define the RBM model with dropout and Xavier initialization
class RBM(nn.Module):
    def __init__(self, n_visible, n_hidden):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_visible, n_hidden) * 0.1)
        self.b = nn.Parameter(torch.zeros(n_visible))
        self.c = nn.Parameter(torch.zeros(n_hidden))
        self.dropout = nn.Dropout(p=0.2)
    
    def forward(self, v):
        h_prob = torch.sigmoid(torch.matmul(v, self.W) + self.c)
        h_prob = self.dropout(h_prob)
        v_recon = torch.sigmoid(torch.matmul(h_prob, self.W.t()) + self.b)
        return v_recon

# Hyperparameter grid
learning_rates = [0.001, 0.005, 0.01]
hidden_layers = [100, 200, 300]
weight_decays = [0.0001, 0.001]
results = []

# Training and evaluation
for lr in learning_rates:
    for n_hidden in hidden_layers:
        for wd in weight_decays:
            print(f'Training RBM with LR={lr}, Hidden Layers={n_hidden}, Weight Decay={wd}')
            
            rbm = RBM(n_visible=n_movies, n_hidden=n_hidden)
            optimizer = optim.Adam(rbm.parameters(), lr=lr, weight_decay=wd)
            
            # Training the RBM with more epochs
            def train_rbm(model, train_data, optimizer, epochs=50):
                for epoch in range(epochs):
                    model.train()
                    optimizer.zero_grad()
                    recon = model(train_data)
                    loss = nn.MSELoss()(recon, train_data)
                    loss.backward()
                    optimizer.step()
                    if epoch % 10 == 0:
                        print(f'Epoch {epoch}: Loss={loss.item()}')
            
            train_rbm(rbm, train_tensor, optimizer)
            
            # Predict and evaluate
            def evaluate_rbm(model, test_data):
                model.eval()
                with torch.no_grad():
                    pred = model(test_tensor)
                    pred = pred.numpy()
                    test_matrix = test_tensor.numpy()

                    # Flatten the predictions and test data for comparison
                    test_flat = [(i, j, test_matrix[i, j]) for i in range(test_matrix.shape[0]) for j in range(test_matrix.shape[1]) if test_matrix[i, j] > 0]
                    pred_flat = [(i, j, pred[i, j]) for i in range(pred.shape[0]) for j in range(pred.shape[1]) if test_matrix[i, j] > 0]

                    # Calculate RMSE and MAE
                    test_ratings = np.array([r for _, _, r in test_flat])
                    pred_ratings = np.array([r for _, _, r in pred_flat])

                    rmse = np.sqrt(mean_squared_error(test_ratings, pred_ratings))
                    mae = mean_absolute_error(test_ratings, pred_ratings)
                    return rmse, mae

            rmse, mae = evaluate_rbm(rbm, test_tensor)
            results.append((lr, n_hidden, wd, rmse, mae))
            print(f'LR={lr}, Hidden Layers={n_hidden}, Weight Decay={wd} => RMSE: {rmse}, MAE: {mae}')

# Display results
results_df = pd.DataFrame(results, columns=['Learning Rate', 'Hidden Layers', 'Weight Decay', 'RMSE', 'MAE'])
print(results_df)



Training RBM with LR=0.001, Hidden Layers=100, Weight Decay=0.0001
Epoch 0: Loss=0.9858817458152771
Epoch 10: Loss=0.9079105854034424
Epoch 20: Loss=0.8491498231887817
Epoch 30: Loss=0.8211618661880493
Epoch 40: Loss=0.8111557960510254
LR=0.001, Hidden Layers=100, Weight Decay=0.0001 => RMSE: 3.1255578994750977, MAE: 2.9461913108825684
Training RBM with LR=0.001, Hidden Layers=100, Weight Decay=0.001
Epoch 0: Loss=0.9852038025856018
Epoch 10: Loss=0.934359610080719
Epoch 20: Loss=0.8876748085021973
Epoch 30: Loss=0.854855477809906
Epoch 40: Loss=0.840477705001831
LR=0.001, Hidden Layers=100, Weight Decay=0.001 => RMSE: 3.1217167377471924, MAE: 2.9424707889556885
Training RBM with LR=0.001, Hidden Layers=200, Weight Decay=0.0001
Epoch 0: Loss=0.9717287421226501
Epoch 10: Loss=0.8472955822944641
Epoch 20: Loss=0.7988429665565491
Epoch 30: Loss=0.7878122329711914
Epoch 40: Loss=0.783592164516449
LR=0.001, Hidden Layers=200, Weight Decay=0.0001 => RMSE: 3.083775281906128, MAE: 2.9096097946