In [12]:
import torch
from torch.utils import data
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./ml-latest/ratings_hashed.csv')

In [3]:
class Dataset(data.Dataset):
    def __init__(self, list_IDs):
        self.list_IDs = list_IDs
    
    def __len__(self):
        return len(self.list_IDs)
    
    def __getitem__(self, index):
        ID = self.list_IDs[index]
        X = torch.load('./data/' + ID + '.pt')
        return X

In [13]:
vector = torch.load('./data/0.pt')
INPUT_SIZE = len(vector)
print(INPUT_SIZE)

283228


In [4]:
def train_test_split(df, alpha=0.90):
    n_movies = df.movie.unique().size
    indexes = [i for i in range(n_movies)]
    np.random.shuffle(indexes)
    limit = int(n_movies * alpha)
    train_indexes = indexes[:limit]
    test_indexes = indexes[limit:]
    return train_indexes, test_indexes

In [5]:
train_indexes, test_indexes = train_test_split(df)

In [6]:
partition = {}
partition['train'] = train_indexes
partition['test'] = test_indexes

### Set up PyTorch

In [7]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [8]:
# Parameters
params = {'batch_size': 64,
         'shuffle': True,
         'num_workers': 4}

In [9]:
# Max epochs
max_epochs = 10

In [10]:
# Generators
training_set = Dataset(partition['train'])
training_generator = data.DataLoader(training_set, **params)

testing_set = Dataset(partition['test'])
testing_generator = data.DataLoader(testing_set, **params)

In [14]:
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__(self)
        self.fc1 = nn.Linear(INPUT_SIZE, 1000)
        self.fc2 = nn.Linear(1000, 256)
        self.fc31 = nn.Linear(256, 32)
        self.fc32 = nn.Linear(256, 32)
        self.fc4 = nn.Linear(32, 256)
        self.fc5 = nn.Linear(256, 1000)
        self.fc6 = nn.Linear(1000, INPUT_SIZE)
        
    def encode(self, x):
        h1 = F.relu(self.fc1(x))
        h2 = F.relu(self.fc2(h1))
        return self.fc31(h2), self.fc32(h2)
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return eps.mul(std).add_(mu)
    
    def decode(self, z):
        h4 = F.relu(self.fc4(z))
        h5 = F.relu(self.fc5(h4))
        return F.relu(self.fc6(h5))
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar