In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle



df_pivot = pd.read_csv('pivot_table (1).csv', index_col=0)
df_melt = df_pivot.stack().reset_index().rename(columns={'level_1': 'Movie_Id', 0: 'Rating'})  

user_movie_matrix = df_pivot.values
n_users, n_movies = user_movie_matrix.shape


scaler = StandardScaler()
user_movie_matrix_normalized = scaler.fit_transform(user_movie_matrix)


# Funkcija za pretvaranje matrice u listu (korisnik, film, ocena) samo za ocene veće od 0
def flatten_matrix(matrix):
    return [(i, j, matrix[i, j]) for i in range(matrix.shape[0]) for j in range(matrix.shape[1]) if matrix[i, j] > 0]


ratings_flat = flatten_matrix(user_movie_matrix_normalized)


train_data, test_data = train_test_split(ratings_flat, test_size=0.2, random_state=42)



def to_tensor(data, n_users, n_movies):
    matrix = torch.zeros(n_users, n_movies)  
    for i, j, r in data:
        if i < n_users and j < n_movies: 
            matrix[int(i), int(j)] = r  
    return matrix

# Konvertovanje trening i test podataka u tenzore
train_tensor = to_tensor(train_data, n_users, n_movies)
test_tensor = to_tensor(test_data, n_users, n_movies)


# Definicija RBM (Restricted Boltzmann Machine) modela
class RBM(nn.Module):
    def __init__(self, n_visible, n_hidden):
        super(RBM, self).__init__()
       
        self.W = nn.Parameter(torch.randn(n_visible, n_hidden) * 0.1)
        self.b = nn.Parameter(torch.zeros(n_visible))  
        self.c = nn.Parameter(torch.zeros(n_hidden))  
        self.dropout = nn.Dropout(p=0.2) 
    
    def forward(self, v):
        h_prob = torch.relu(torch.matmul(v, self.W) + self.c)  
        h_prob = self.dropout(h_prob) 
        v_recon = torch.sigmoid(torch.matmul(h_prob, self.W.t()) + self.b) 
        return v_recon



lr = 0.005
n_hidden = 200
wd = 0.0001

print(f'Training RBM with LR={lr}, Hidden Layers={n_hidden}, Weight Decay={wd}')

# Inicijalizacija RBM modela i Adam optimizatora
rbm = RBM(n_visible=n_movies, n_hidden=n_hidden)
optimizer = optim.Adam(rbm.parameters(), lr=lr, weight_decay=wd)


# Funkcija za treniranje RBM modela
def train_rbm(model, train_data, optimizer, epochs=100): 
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()  # Resetovanje gradijenata
        recon = model(train_tensor)  # Rekonstrukcija ocena
        loss = nn.MSELoss()(recon, train_tensor)  # Računanje gubitka (MSE)
        loss.backward()  # Backpropagation
        optimizer.step()  # Ažuriranje parametara modela
        if epoch % 10 == 0:
            print(f'Epoch {epoch}: Loss={loss.item()}')  # Ispis gubitka na svakoj 10. epohi

# Treniranje RBM modela
train_rbm(rbm, train_tensor, optimizer)

# Snimanje treniranog modela pomoću pickle
model_path = 'rbm_model.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(rbm, f)

print(f"Model saved to {model_path}")


# Učitavanje treniranog modela iz pickle fajla
with open(model_path, 'rb') as f:
    loaded_model = pickle.load(f)

print("Model loaded successfully!")


# Učitavanje novih test podataka (novi korisnici i ocene filmova)
df_new_test = pd.read_csv('test_data.csv')  

# Kreiranje nove matrice korisnik-film iz novih podataka, gde se popunjava sa 0 za nedostajuće vrednosti
new_user_movie_matrix = df_new_test.pivot(index='Cust_Id', columns='Movie_Id', values='Rating').fillna(0).values
# Konvertovanje novih test podataka u tensor koristeći flatten funkciju
new_test_tensor = to_tensor(flatten_matrix(new_user_movie_matrix), n_users, n_movies)


# Funkcija za evaluaciju RBM modela na test podacima
def evaluate_rbm(model, test_tensor):
    model.eval()  # Prebacivanje modela u eval mod
    with torch.no_grad():  
        pred = model(test_tensor)  # Predikcija ocena
        pred = pred.numpy()
        test_matrix = test_tensor.numpy()

        # Priprema liste stvarnih i predikovanih ocena (filter samo za ocene veće od 0)
        test_flat = [(i, j, test_matrix[i, j]) for i in range(test_matrix.shape[0]) for j in range(test_matrix.shape[1]) if test_matrix[i, j] > 0]
        pred_flat = [(i, j, pred[i, j]) for i in range(pred.shape[0]) for j in range(pred.shape[1]) if test_matrix[i, j] > 0]

        # Ekstrakcija stvarnih i predikovanih ocena
        test_ratings = np.array([r for _, _, r in test_flat])
        pred_ratings = np.array([r for _, _, r in pred_flat])

       
        rmse = np.sqrt(mean_squared_error(test_ratings, pred_ratings))
        mae = mean_absolute_error(test_ratings, pred_ratings)
        return rmse, mae

# Evaluacija učitanog modela na novim test podacima
rmse, mae = evaluate_rbm(loaded_model, new_test_tensor)
print(f'RMSE on new test data: {rmse}')
print(f'MAE on new test data: {mae}')



Training RBM with LR=0.005, Hidden Layers=200, Weight Decay=0.0001
Epoch 0: Loss=0.263776570558548
Epoch 10: Loss=0.12476474046707153
Epoch 20: Loss=0.10092094540596008
Epoch 30: Loss=0.08951449394226074
Epoch 40: Loss=0.07909257709980011
Epoch 50: Loss=0.07131930440664291
Epoch 60: Loss=0.06680222600698471
Epoch 70: Loss=0.06486932188272476
Epoch 80: Loss=0.06384913623332977
Epoch 90: Loss=0.06303225457668304
Model saved to rbm_model.pkl
Model loaded successfully!
RMSE on new test data: 3.47190523147583
MAE on new test data: 3.2912557125091553


In [None]:
# Rezultati na trening skupu:
# LR=0.005, Hidden Layers=100, Weight Decay=0.001 => RMSE: 0.6288995742797852, MAE: 0.4929995834827423
# Rezultati na test skupu: RMSE on new test data: 3.47190523147583, MAE on new test data: 3.2912557125091553