In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [2]:
movies_df = pd.read_csv('./ml-1m/movies.dat', sep="::", header=None, engine='python', encoding='latin-1')

In [3]:
movies_df.head()

Unnamed: 0,0,1,2
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
users_df = pd.read_csv('./ml-1m/users.dat', sep="::", header=None, engine='python', encoding='latin-1')

In [5]:
users_df.head()

Unnamed: 0,0,1,2,3,4
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [6]:
ratings_df = pd.read_csv('./ml-1m/ratings.dat', sep="::", header=None, engine='python', encoding='latin-1')

In [7]:
ratings_df.head()

Unnamed: 0,0,1,2,3
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


### Preparing training and test set

#### Using only one training-test split test

In [8]:
training_set = pd.read_csv('./ml-100k/u1.base', delimiter="\t", header=None, engine='python', encoding='latin-1')

In [9]:
training_set.head()

Unnamed: 0,0,1,2,3
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712


In [10]:
training_set.shape

(80000, 4)

#### This is a 80-20 split

In [11]:
training_set = np.array(training_set, dtype='int')

In [12]:
test_set = pd.read_csv('./ml-100k/u1.test', delimiter="\t", header=None, engine='python', encoding='latin-1')

In [13]:
test_set.head()

Unnamed: 0,0,1,2,3
0,1,6,5,887431973
1,1,10,3,875693118
2,1,12,5,878542960
3,1,14,5,874965706
4,1,17,3,875073198


In [14]:
test_set = np.array(test_set, dtype='int')

### Getting the number of users and movies

- Use max of training and test sets to ensure it works on all possible splits

In [15]:
num_of_users = int(max(max(training_set[:, 0]), max(test_set[:, 0])))

In [16]:
num_of_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1])))

In [17]:
def convert(data):
    new_data = []
    for id_user in range(1, num_of_users+1):
        id_movies = data[:, 1][data[:, 0] == id_user]
        id_ratings = data[:, 2][data[:, 0] == id_user]
        ratings = np.zeros(num_of_movies)
        ratings[id_movies - 1] = id_ratings
        new_data.append(ratings)
    return new_data

In [18]:
training_set = convert(training_set)
test_set = convert(test_set)

### Converting data into pytorch tensors

In [19]:
training_set = torch.FloatTensor(training_set)

In [20]:
training_set


    5     3     4  ...      0     0     0
    4     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
       ...          ⋱          ...       
    5     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
    0     5     0  ...      0     0     0
[torch.FloatTensor of size 943x1682]

In [21]:
test_set = torch.FloatTensor(test_set)

In [22]:
test_set


    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
       ...          ⋱          ...       
    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
    0     0     0  ...      0     0     0
[torch.FloatTensor of size 943x1682]

### Converting the ratings to binary

- Because we scale the rating between 0 and 1, the movies that were not rated by the users need to be assigned a different value. We will assign them a value of -1

In [23]:
training_set[training_set == 0] = -1

- For ratings to be equal to user, we will chose those movies that were rated 1 or 2 as 0 and the ones that had a rating of 3 or more as 1.

In [24]:
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0

In [25]:
training_set[training_set >= 3] = 1

- Replacing for the test set

In [26]:
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1

### Creating the architecture of the RBM

- RBM is a probablistic graphical model

In [29]:
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv) # Initializes tensor of size (nh, nv) of normal distribution
                                    # with variance of 1
        self.a = torch.randn(1, nh) # Bias for the hidden nodes
        self.b = torch.randn(1, nv) # Bias for the visible nodes
        
    def sample_h(self, X): # Probability for h equals 1 given visible node V
        wx = torch.nn(X, self.W.t())
        activation = wx + self.a.expand_as(wx) # Apply the bias to each line of the minibatch
        p_h_given_v = torch.sigmoid(activation)
        # Using bernoulli sampling
        return p_h_given_v, torch.bernoulli(p_h_given_v)

    def sample_v(self, H):
        wh = torch.nn(H, self.W)
        activation = wx + self.b.expand_as(wx)
        p_v_given_h = torch.sigmoid(activation)
        # Using bernoulli sampling
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    
    def train(self, v0, vk, ph0, phk): # Implements contrastive divergence(comes with gibbs sampling)
        """
        Function to implement contrastive divergence
        :param v0: Input vector
        :param vk: Visible nodes after k iterations of contrastive divergence
        :param ph0: vector of probs that are the first iteration the hidden nodes are 1 given the V are 1
        :param phk: Probs of hidden nodes after k sampling given the probs of nodes vk.
        """
        # Update W, b and then a. Formulae in the paper.
        self.W += torch.nn(v0.t(), ph0) - torch.nn(vk.t(), phk)
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)
        

In [30]:
model = RBM()

TypeError: __init__() missing 2 required positional arguments: 'nv' and 'nh'