In [1]:
import torch
from torch.utils import data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('./ml-latest/ratings_hashed.csv', encoding = "ISO-8859-1")

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
class Dataset(data.Dataset):
    def __init__(self, list_IDs):
        self.list_IDs = list_IDs
    
    def __len__(self):
        return len(self.list_IDs)
    
    def __getitem__(self, index):
        ID = self.list_IDs[index]
        X = torch.load('./data_user/' + str(ID) + '.pt')
        return X

In [6]:
vector = torch.load('./data_user/0.pt')
INPUT_SIZE = len(vector)
print(INPUT_SIZE)

53889


In [7]:
def train_test_split(df, alpha=0.90):
    n_users = df.user.unique().size
    indexes = [i for i in range(n_users)]
    np.random.shuffle(indexes)
    limit = int(n_users * alpha)
    train_indexes = indexes[:limit]
    test_indexes = indexes[limit:]
    return train_indexes, test_indexes

In [8]:
train_indexes, test_indexes = train_test_split(df)

In [9]:
len(train_indexes)

254905

In [10]:
partition = {}
partition['train'] = train_indexes
partition['test'] = test_indexes

### Set up PyTorch

In [11]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [12]:
# Parameters
params = {'batch_size': 8,
         'shuffle': True,
         'num_workers': 4}

In [13]:
# Max epochs
max_epochs = 10

In [14]:
# Generators
training_set = Dataset(partition['train'])
training_generator = data.DataLoader(training_set, **params)

testing_set = Dataset(partition['test'])
testing_generator = data.DataLoader(testing_set, **params)

In [20]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.fc1 = nn.Linear(INPUT_SIZE, 256)
        self.fc21 = nn.Linear(256, 32)
        self.fc22 = nn.Linear(256, 32)
        self.fc3 = nn.Linear(32, 256)
        self.fc4 = nn.Linear(256, INPUT_SIZE)
        
    def encode(self, x):
        x = x.float()
        h1 = F.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)
    
    def decode(self, z):
        h3 = F.relu(self.fc3(z))
        return F.relu(self.fc4(h3))
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

In [21]:
model = VAE().to(device)

In [22]:
optimizer = optim.Adam(model.parameters(), lr = 1e-3)

In [23]:
# we need to compute the loss only on the non-zero values
def loss_function(x_hat, x, mu, logvar):
    BCE = F.binary_cross_entropy(x_hat, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD

In [26]:
# Loop over epochs
for epoch in range(max_epochs):
    running_loss = 0.0
    # Training
    for i, x_batch in enumerate(training_generator):
        # Transfert to GPU
        x_batch = x_batch.to(device).float()
        optimizer.zero_grad()
        x_hat_batch, mu, logvar = model(x_batch)
        loss = loss_function(x_hat_batch, x_batch, mu, logvar)
        loss.backward()
        optimizer.step()
        running_loss +=loss.item()
        if i % 100 == 99: 
            print('[%d, %9d] loss: %.3f' % (epoch + 1,
                                           i  + 1, 
                                           running_loss / 10000))
            running_loss = 0.0
print('Finished training')
        

RuntimeError: Assertion `x >= 0. && x <= 1.' failed. input value should be between 0~1, but got 1.022448 at /opt/conda/conda-bld/pytorch_1533672544752/work/aten/src/THNN/generic/BCECriterion.c:62