# Máquinas de Boltzmann Restringidas

## Importando librerías

In [None]:
import numpy as np
import pandas as pd
import torch

## Creando datasets

In [None]:
movies = pd.DataFrame([1, 2, 3, 4, 5], columns=['movie_id'])
users  = pd.DataFrame([1, 2, 3], columns=['user_id'])
ratings  = pd.DataFrame(
    {
        'user_id':  [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
        'movie_id': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
        'raiting':  [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2]
    }
)

#movies, users, 
ratings

## Preparar el conjunto de entrenamiento y elconjunto de testing

In [None]:
training_set = ratings[0:10]
training_set = np.array(training_set, dtype = "int")

test_set = ratings[10:]
test_set = np.array(test_set, dtype = "int")

training_set, test_set

## Obtener el número de usuarios y de películas

In [None]:
nb_users = int(max(max(training_set[:, 0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

nb_users, nb_movies

# PRE-PROCESAMIENTO DE DATOS

## Convertir los datos en una matriz bidimensional X[u,i].
### Usuarios u en fila y películas i en columnas

In [None]:
def convert(data):
    new_data = []
    for id_user in range(1, nb_users+1):
        id_movies = data[:, 1][data[:, 0] == id_user]
        id_ratings = data[:, 2][data[:, 0] == id_user]
        ratings = np.zeros(nb_movies)
        ratings[id_movies-1] = id_ratings
        new_data.append(list(ratings))
    return new_data

training_set = convert(training_set)
test_set = convert(test_set)

training_set, test_set

## Convertir los datos a tensores de Torch

In [None]:
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

training_set, test_set

## Convertir las valoraciones a valores binarios
- 1 (Me gusta)
- 0 (No me gusta)

In [None]:
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

training_set, test_set

# RBM
## Crear la arquitectura de la Red Neuronal
#### (Modelo Probabilistico Gráfico)

In [None]:
class RBM():
    
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
        
    def sample_h(self, x):           #x = mini_batch_size x nv
        wx = torch.mm(x, self.W.t()) #mini_batch_size x nh
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    
    def sample_v(self, y):           #y = mini_batch_size x nh
        wy = torch.mm(y, self.W) #mini_batch_size x nv
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)   
    
    def train(self, v0, vk, ph0, phk):
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)


## Entrenar la RBM

In [None]:
nv = len(training_set[0])

# nh = 1
# nb_epoch = 1
# cd_k = 1

nh = 50
nb_epoch = 10
cd_k = 10

batch_size = 1

rbm = RBM(nv, nh)

usuario = 0
for epoch in range(1, nb_epoch+1):
    training_loss = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(cd_k):
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk)
            vk[v0 < 0] = v0[v0 < 0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        training_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        s += 1.
        usuario = id_user
        # print(usuario)
    print("Epoch: "+str(epoch)+", Loss: "+str(training_loss/s))

usuario, v0, vk
# rbm.W, rbm.a, rbm.b

## Testear la RBM

In [None]:
testing_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        testing_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
        print(f"Usuario: {id_user+1}, Testing Loss: "+str(testing_loss/s))

vt, v