In [1]:
# Importar las librerías
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
# Importar el dataset
movies = pd.read_csv("ml-1m/movies.dat", sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users  = pd.read_csv("ml-1m/users.dat", sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings  = pd.read_csv("ml-1m/ratings.dat", sep = '::', header = None, engine = 'python', encoding = 'latin-1')
print(movies)
print(users)
print(ratings)

         0                                   1                             2
0        1                    Toy Story (1995)   Animation|Children's|Comedy
1        2                      Jumanji (1995)  Adventure|Children's|Fantasy
2        3             Grumpier Old Men (1995)                Comedy|Romance
3        4            Waiting to Exhale (1995)                  Comedy|Drama
4        5  Father of the Bride Part II (1995)                        Comedy
...    ...                                 ...                           ...
3878  3948             Meet the Parents (2000)                        Comedy
3879  3949          Requiem for a Dream (2000)                         Drama
3880  3950                    Tigerland (2000)                         Drama
3881  3951             Two Family House (2000)                         Drama
3882  3952               Contender, The (2000)                Drama|Thriller

[3883 rows x 3 columns]
         0  1   2   3      4
0        1  F   1  10 

In [8]:
# Preparar el conjunto de entrenamiento y elconjunto de testing
training_set = pd.read_csv("ml-100k/u1.base", sep = "\t", header = None) #base significa entrenamiento
training_set = np.array(training_set, dtype = "int") #para pytorch pasamos el dataframe de pandas a array
test_set = pd.read_csv("ml-100k/u1.test", sep = "\t", header = None)
test_set = np.array(test_set, dtype = "int")

In [9]:
# Obtener el número de usuarios y de películas
nb_users = int(max(max(training_set[:, 0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))
print(nb_users)
print(nb_movies)

943
1682


In [10]:
# Convertir los datos en un array X[u,i] con usuarios 'u' en fila y películas 'i' en columna
def convert(data):
    new_data = []
    for id_user in range(1, nb_users+1):
        id_movies = data[:, 1][data[:, 0] == id_user]
        id_ratings = data[:, 2][data[:, 0] == id_user]
        ratings = np.zeros(nb_movies)
        ratings[id_movies-1] = id_ratings
        new_data.append(list(ratings))
    return new_data

training_set = convert(training_set)
test_set = convert(test_set)

In [11]:
# Convertir los datos a tensores de Torch
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [8]:
# Convertir las valoraciones(0-5) a valores binarios 1 (Me gusta) o 0 (No me gusta)
training_set[training_set == 0] = -1 #0 es una valoración desconocida, le asignamos el -1
training_set[training_set == 1] = 0 #del 1 al 2 le asignamos un NO me gusta
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1 #mayores de 2 le damos Me Gusta

test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1


In [12]:
# Crear la arquitectura de la Red Neuronal (Modelo Probabilistico Gráfico)

class RBM():
    def __init__(self, nv, nh): #constructor de la clase RBM. nv=nodos visibles. nh=nodos ocultos-hidden
        # Definimos los parámetros del objeto RBM usando self.
        
        #matriz para los pesos con valores aleatorios(distribucion normal-gauss) que uniran los nv con nh
        self.W = torch.randn(nh, nv)
        
        #inicializar los sesgos(probabilidad de que se active el nodo oculto conociendo 
        #el visible que se ha activado y viceversa). Creamos vectores bidimensionales añandiendo(1,...)
        self.a = torch.randn(1, nh) #probabilidad de activacion para el nodo oculto nh
        self.b = torch.randn(1, nv) #probabilidad de activacion para el nodo visible nv
    
    #La siguiente funcion nos muestreara los nodos de la capa oculta segun las 
    #probabilidades p_h_given_v. Es decir, las probabilidades de activacion de 
    #un nodo oculto conociendo los valores de los visibles
    def sample_h(self, x):    #self para acceder al constructor y x = mini_batch_size valores de la capa visible
        wx = torch.mm(x, self.W.t()) #mini_batch_size multiplicado por nh (ojo,operacion con pytorch)
        activation = wx + self.a.expand_as(wx) #funcion de activacion(expandiendo a la dimesion de wx)
        p_h_given_v = torch.sigmoid(activation) #aplicacoms una sigmoide como funcion de activacion
        return p_h_given_v, torch.bernoulli(p_h_given_v) #usamos bernoulli para saber si activamos o no la neurona oculta
    
    #La siguiente funcion nos muestreara los nodos de la capa visible segun las 
    #probabilidades p_v_given_h. Es decir, las probabilidades de activacion de 
    #un nodo visible conociendo los valores de los ocultos
    def sample_v(self, y):           #y = mini_batch_size x nh
        wy = torch.mm(y, self.W) #mini_batch_size x nv
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)  
    
    #funcion de entrenamiento usando la deivergencia contrastante
    def train(self, v0, vk, ph0, phk): 
        #v0=valor original de las peliculas.
        #vk=nodos visibles despues de k pasos
        #ph0=probabilidades de iteracion de los ocultos dados los visibles
        #phk=probabilidades de los nodos ocultos despues de k iteraciones
        
        #k-pasos para la divergencia contrastante
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)
        
nv = len(training_set[0]) #nodos visibles que tenemos como entrada
nh = 100 #nodos ocultos que queremos tener(caracteristicas que definen a cada pelicula)
batch_size = 100 #tamaño del bloque de aprendizaje(nº elementos que se entrenana a la vez antes de actualizarse los pesos)

rbm = RBM(nv, nh)

In [13]:
# Entrenar la RBM

nb_epoch = 10 #nº de epocas-iteraciones

for epoch in range(1, nb_epoch+1):
    training_loss = 0 #perdidas para medir la cantidad de errores en el entrenamiento(usando mean)
    s = 0. #Cuantos usuarios calificaron al menos una pelicula, en decimal
    
    #bucle para obteener los lotes de usuarios de 100 en 100
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0) #para llamar al primer parametro del return del metodo samble_h()
        for k in range(10): #divergencia contrastante en k pasos
            _,hk = rbm.sample_h(vk) #para llamar al segundo parametro del return del metodo sample_h() de antes
            _,vk = rbm.sample_v(hk)
            vk[v0 < 0] = v0[v0 < 0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        training_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        s += 1.
    print("Epoch: "+str(epoch)+", Loss: "+str(training_loss/s))


Epoch: 1, Loss: tensor(0.3295)
Epoch: 2, Loss: tensor(0.2884)
Epoch: 3, Loss: tensor(0.2837)
Epoch: 4, Loss: tensor(0.2877)
Epoch: 5, Loss: tensor(0.2892)
Epoch: 6, Loss: tensor(0.2874)
Epoch: 7, Loss: tensor(0.2860)
Epoch: 8, Loss: tensor(0.2843)
Epoch: 9, Loss: tensor(0.2843)
Epoch: 10, Loss: tensor(0.2832)


In [14]:
# Testear la RBM
testing_loss = 0
s = 0.
for id_user in range(nb_users):
    v = training_set[id_user:id_user+1]
    vt = test_set[id_user:id_user+1]
    if len(vt[vt>=0]) > 0:
        _,h = rbm.sample_h(v)
        _,v = rbm.sample_v(h)
        testing_loss += torch.mean(torch.abs(vt[vt>=0] - v[vt>=0]))
        s += 1.
        print("Testing Loss: "+str(testing_loss/s))

Testing Loss: tensor(0.3829)
Testing Loss: tensor(0.2684)
Testing Loss: tensor(0.2590)
Testing Loss: tensor(0.2301)
Testing Loss: tensor(0.2389)
Testing Loss: tensor(0.2708)
Testing Loss: tensor(0.3188)
Testing Loss: tensor(0.3231)
Testing Loss: tensor(0.3037)
Testing Loss: tensor(0.3183)
Testing Loss: tensor(0.3250)
Testing Loss: tensor(0.3266)
Testing Loss: tensor(0.3499)
Testing Loss: tensor(0.3523)
Testing Loss: tensor(0.3401)
Testing Loss: tensor(0.3374)
Testing Loss: tensor(0.3252)
Testing Loss: tensor(0.3331)
Testing Loss: tensor(0.3232)
Testing Loss: tensor(0.3166)
Testing Loss: tensor(0.3124)
Testing Loss: tensor(0.3095)
Testing Loss: tensor(0.3124)
Testing Loss: tensor(0.3136)
Testing Loss: tensor(0.3152)
Testing Loss: tensor(0.3093)
Testing Loss: tensor(0.3031)
Testing Loss: tensor(0.3047)
Testing Loss: tensor(0.2993)
Testing Loss: tensor(0.2943)
Testing Loss: tensor(0.2927)
Testing Loss: tensor(0.2880)
Testing Loss: tensor(0.2836)
Testing Loss: tensor(0.2794)
Testing Loss: 

Testing Loss: tensor(0.2609)
Testing Loss: tensor(0.2605)
Testing Loss: tensor(0.2613)
Testing Loss: tensor(0.2611)
Testing Loss: tensor(0.2608)
Testing Loss: tensor(0.2605)
Testing Loss: tensor(0.2610)
Testing Loss: tensor(0.2618)
Testing Loss: tensor(0.2619)
Testing Loss: tensor(0.2623)
Testing Loss: tensor(0.2621)
Testing Loss: tensor(0.2624)
Testing Loss: tensor(0.2624)
Testing Loss: tensor(0.2624)
Testing Loss: tensor(0.2624)
Testing Loss: tensor(0.2631)
Testing Loss: tensor(0.2626)
Testing Loss: tensor(0.2629)
Testing Loss: tensor(0.2628)
Testing Loss: tensor(0.2638)
Testing Loss: tensor(0.2634)
Testing Loss: tensor(0.2638)
Testing Loss: tensor(0.2635)
Testing Loss: tensor(0.2635)
Testing Loss: tensor(0.2644)
Testing Loss: tensor(0.2640)
Testing Loss: tensor(0.2637)
Testing Loss: tensor(0.2644)
Testing Loss: tensor(0.2650)
Testing Loss: tensor(0.2652)
Testing Loss: tensor(0.2659)
Testing Loss: tensor(0.2662)
Testing Loss: tensor(0.2659)
Testing Loss: tensor(0.2655)
Testing Loss: 

Testing Loss: tensor(0.2385)
Testing Loss: tensor(0.2385)
Testing Loss: tensor(0.2385)
Testing Loss: tensor(0.2383)
Testing Loss: tensor(0.2383)
Testing Loss: tensor(0.2381)
Testing Loss: tensor(0.2380)
Testing Loss: tensor(0.2378)
Testing Loss: tensor(0.2377)
Testing Loss: tensor(0.2375)
Testing Loss: tensor(0.2376)
Testing Loss: tensor(0.2375)
Testing Loss: tensor(0.2374)
Testing Loss: tensor(0.2375)
Testing Loss: tensor(0.2373)
Testing Loss: tensor(0.2374)
Testing Loss: tensor(0.2372)
Testing Loss: tensor(0.2373)
Testing Loss: tensor(0.2374)
Testing Loss: tensor(0.2374)
Testing Loss: tensor(0.2372)
Testing Loss: tensor(0.2373)
Testing Loss: tensor(0.2374)
Testing Loss: tensor(0.2373)
Testing Loss: tensor(0.2371)
Testing Loss: tensor(0.2372)
Testing Loss: tensor(0.2371)
Testing Loss: tensor(0.2369)
Testing Loss: tensor(0.2368)
Testing Loss: tensor(0.2366)
Testing Loss: tensor(0.2366)
Testing Loss: tensor(0.2364)
Testing Loss: tensor(0.2363)
Testing Loss: tensor(0.2364)
Testing Loss: 