<a href="https://colab.research.google.com/github/cccg8105/notebook_AD/blob/deep_learning/deep_learning/maquina_boltzmann/MBR_Template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Máquina de Boltzmann restringida

## Instalar dependencias

In [1]:
pip install torch===1.6.0 torchvision===0.7.0 -f https://download.pytorch.org/whl/torch_stable.html

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch===1.6.0
[?25l  Downloading https://files.pythonhosted.org/packages/38/53/914885a93a44b96c0dd1c36f36ff10afe341f091230aad68f7228d61db1e/torch-1.6.0-cp36-cp36m-manylinux1_x86_64.whl (748.8MB)
[K     |████████████████████████████████| 748.8MB 14kB/s 
[?25hCollecting torchvision===0.7.0
[?25l  Downloading https://files.pythonhosted.org/packages/8e/dc/4a939cfbd38398f4765f712576df21425241020bfccc200af76d19088533/torchvision-0.7.0-cp36-cp36m-manylinux1_x86_64.whl (5.9MB)
[K     |████████████████████████████████| 5.9MB 12.6MB/s 
Installing collected packages: torch, torchvision
  Found existing installation: torch 1.7.0+cu101
    Uninstalling torch-1.7.0+cu101:
      Successfully uninstalled torch-1.7.0+cu101
  Found existing installation: torchvision 0.8.1+cu101
    Uninstalling torchvision-0.8.1+cu101:
      Successfully uninstalled torchvision-0.8.1+cu101
Successfully installed torch-1.6.0 torchvision-

## Importar conjunto de datos

In [13]:
# Importar las librerías
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

movies = pd.read_csv("https://raw.githubusercontent.com/cccg8105/deeplearning-az/master/datasets/Part%205%20-%20Boltzmann%20Machines%20(BM)/ml-1m/movies.dat", sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users  = pd.read_csv("https://raw.githubusercontent.com/cccg8105/deeplearning-az/master/datasets/Part%205%20-%20Boltzmann%20Machines%20(BM)/ml-1m/users.dat", sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings  = pd.read_csv("https://raw.githubusercontent.com/cccg8105/deeplearning-az/master/datasets/Part%205%20-%20Boltzmann%20Machines%20(BM)/ml-1m/ratings.dat", sep = '::', header = None, engine = 'python', encoding = 'latin-1')

training_set = pd.read_csv("https://raw.githubusercontent.com/cccg8105/deeplearning-az/master/datasets/Part%205%20-%20Boltzmann%20Machines%20(BM)/ml-100k/u1.base", sep = "\t", header = None)
# se convierte la estructura para el manejo en pytorch
training_set = np.array(training_set, dtype = "int")
test_set = pd.read_csv("https://raw.githubusercontent.com/cccg8105/deeplearning-az/master/datasets/Part%205%20-%20Boltzmann%20Machines%20(BM)/ml-100k/u1.test", sep = "\t", header = None)
# se convierte la estructura para el manejo en pytorch
test_set = np.array(test_set, dtype = "int")


nb_users = int(max(max(training_set[:, 0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

In [12]:
def convert(data):
    new_data = []
    for id_user in range(1, nb_users+1):
        # se obtienen peliculas valoradas por usuario
        id_movies = data[:, 1][data[:, 0] == id_user]
        # se obtienen valoraciones por usuario
        id_ratings = data[:, 2][data[:, 0] == id_user]
        # Se crea una matriz con los registros anteriores 
        ratings = np.zeros(nb_movies)
        ratings[id_movies-1] = id_ratings
        new_data.append(list(ratings))
    return new_data

In [14]:
training_set = convert(training_set)
test_set = convert(test_set)

# se convierten los datos a tipo de variable pytorch
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

### Conversión de valoraciones

In [15]:
# Convertir las valoraciones a valores binarios 1 (Me gusta) o 0 (No me gusta)
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

## Contrucción de MBR

In [16]:
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
    def sample_h(self, x):           #x = mini_batch_size x nv
        wx = torch.mm(x, self.W.t()) #mini_batch_size x nh
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self, y):           #y = mini_batch_size x nh
        wy = torch.mm(y, self.W) #mini_batch_size x nv
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)   
    def train(self, v0, vk, ph0, phk):
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)

In [17]:
nv = len(training_set[0])
nh = 100
batch_size = 100

rbm = RBM(nv, nh)

## Entrenamiento

In [18]:
nb_epoch = 10
for epoch in range(1, nb_epoch+1):
    training_loss = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(10):
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk)
            vk[v0 < 0] = v0[v0 < 0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        training_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        s += 1.
    print("Epoch: "+str(epoch)+", Loss: "+str(training_loss/s))

Epoch: 1, Loss: tensor(0.3702)
Epoch: 2, Loss: tensor(0.2528)
Epoch: 3, Loss: tensor(0.2490)
Epoch: 4, Loss: tensor(0.2513)
Epoch: 5, Loss: tensor(0.2466)
Epoch: 6, Loss: tensor(0.2471)
Epoch: 7, Loss: tensor(0.2472)
Epoch: 8, Loss: tensor(0.2385)
Epoch: 9, Loss: tensor(0.2484)
Epoch: 10, Loss: tensor(0.2490)


## Evaluación del modelo

In [19]:
testing_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        testing_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
        print("Testing Loss: "+str(testing_loss/s))

Testing Loss: tensor(0.2263)
Testing Loss: tensor(0.2040)
Testing Loss: tensor(0.3155)
Testing Loss: tensor(0.2866)
Testing Loss: tensor(0.3007)
Testing Loss: tensor(0.2869)
Testing Loss: tensor(0.2707)
Testing Loss: tensor(0.2541)
Testing Loss: tensor(0.2370)
Testing Loss: tensor(0.2289)
Testing Loss: tensor(0.2344)
Testing Loss: tensor(0.2309)
Testing Loss: tensor(0.2380)
Testing Loss: tensor(0.2348)
Testing Loss: tensor(0.2479)
Testing Loss: tensor(0.2412)
Testing Loss: tensor(0.2466)
Testing Loss: tensor(0.2414)
Testing Loss: tensor(0.2445)
Testing Loss: tensor(0.2482)
Testing Loss: tensor(0.2522)
Testing Loss: tensor(0.2525)
Testing Loss: tensor(0.2505)
Testing Loss: tensor(0.2432)
Testing Loss: tensor(0.2345)
Testing Loss: tensor(0.2367)
Testing Loss: tensor(0.2379)
Testing Loss: tensor(0.2356)
Testing Loss: tensor(0.2356)
Testing Loss: tensor(0.2407)
Testing Loss: tensor(0.2404)
Testing Loss: tensor(0.2395)
Testing Loss: tensor(0.2443)
Testing Loss: tensor(0.2460)
Testing Loss: 