In [137]:
from utils import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pickle

In [151]:
DataFolder = 'Data/Small_Data'
movies, users, moviesVecs, y = LoadData(DataFolder)

numMoviesFeatures = movies.shape[1] - 1
numUsersFeatures = users.shape[1] - 3

movies = movies.iloc[:, 1:]
users = users.iloc[:, 3:]


In [152]:
scalerMovies = StandardScaler()
scalerMovies.fit(movies)
moviesScaled = scalerMovies.transform(movies)

scalerUsers = StandardScaler()
scalerUsers.fit(users)
usersScaled = scalerUsers.transform(users)

scalerY = MinMaxScaler()
scalerY.fit(y)
yScaled = scalerY.transform(y)

moviesTrain, moviesTest, usersTrain, usersTest, yTrain, yTest = train_test_split(moviesScaled, usersScaled, yScaled, test_size=0.2, random_state=42)
moviesTrain, moviesVal, usersTrain, usersVal, yTrain, yVal = train_test_split(moviesTrain, usersTrain, yTrain, test_size=0.2, random_state=42)


In [150]:
batchSize = 32

class MovieUserDataset(Dataset):
    def __init__(self, movies, users, y):
        self.movies = torch.tensor(movies, dtype=torch.float32)
        self.users = torch.tensor(users, dtype=torch.float32)
        self.targets = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.movies[idx], self.users[idx], self.targets[idx]
    
trainDataset = MovieUserDataset(moviesTrain, usersTrain, yTrain)
valDataset = MovieUserDataset(moviesVal, usersVal, yVal)
testDataset = MovieUserDataset(moviesTest, usersTest, yTest)

trainLoader = DataLoader(trainDataset, batch_size=batchSize, shuffle=True)
valLoader = DataLoader(valDataset, batch_size=batchSize, shuffle=False)
testLoader = DataLoader(testDataset, batch_size=batchSize, shuffle=False)

In [101]:
numOutputs = 32

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.userNN = nn.Sequential(
            nn.Linear(numUsersFeatures, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, numOutputs),
        )
        self.movieNN = nn.Sequential(
            nn.Linear(numMoviesFeatures, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, numOutputs),
        )
        self.l2Norm = F.normalize
    def forward(self, movies, users):
        movieOut = self.movieNN(movies)
        userOut = self.userNN(users)
        moviesNorm = self.l2Norm(movieOut,p=2,dim=1)
        usersNorm = self.l2Norm(userOut,p=2,dim=1)
        similiarity = torch.sum(moviesNorm * usersNorm, dim=1)
        similiarity = similiarity.view(-1, 1)
        
        return similiarity    


In [102]:
Model = Model()

In [103]:
optimizer = optim.Adam(Model.parameters(), lr=0.001)

In [104]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

Model.to(device)

Model(
  (userNN): Sequential(
    (0): Linear(in_features=14, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=32, bias=True)
  )
  (movieNN): Sequential(
    (0): Linear(in_features=16, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=32, bias=True)
  )
)

In [80]:
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs, device="cpu"):
    for epoch in range(1, epochs+1):
        trainLoss = 0
        valLoss = 0
        model.train()
        for batch in train_loader:
            movies, users, targets = batch
            movies = movies.to(device)
            users = users.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            output = model(movies, users)
            loss = loss_fn(output, targets)
            loss.backward()
            optimizer.step()
            trainLoss += loss.item() * targets.size(0)
        trainLoss /= len(train_loader.dataset)
        
        model.eval()
        for batch in val_loader:
            movies, users, targets = batch
            movies = movies.to(device)
            users = users.to(device)
            targets = targets.to(device)
            output = model(movies, users)
            loss = loss_fn(output, targets)
            valLoss += loss.item() * targets.size(0)
        valLoss /= len(val_loader.dataset)
        
        print(f"Epoch: {epoch}, Training Loss: {trainLoss}, Validation Loss: {valLoss}")
        

In [111]:
loss_fn = nn.MSELoss()
epochs = 20

In [112]:
train(Model, optimizer, loss_fn, trainLoader, valLoader, epochs, device)

Epoch: 1, Training Loss: 0.021717797718052597, Validation Loss: 0.023131016182942665
Epoch: 2, Training Loss: 0.021255921179829322, Validation Loss: 0.022628078095796096
Epoch: 3, Training Loss: 0.020802522793608058, Validation Loss: 0.023369876681125827
Epoch: 4, Training Loss: 0.020401215637321224, Validation Loss: 0.022309250073167475
Epoch: 5, Training Loss: 0.0199973158288494, Validation Loss: 0.02217084397151206
Epoch: 6, Training Loss: 0.01956749842577108, Validation Loss: 0.021700463265279416
Epoch: 7, Training Loss: 0.019308085440207415, Validation Loss: 0.02158643286725544
Epoch: 8, Training Loss: 0.018971356692426607, Validation Loss: 0.0215726550737109
Epoch: 9, Training Loss: 0.018626524396528996, Validation Loss: 0.021289496319526334
Epoch: 10, Training Loss: 0.018392526028576463, Validation Loss: 0.020966414160874573
Epoch: 11, Training Loss: 0.01813304516697432, Validation Loss: 0.02155991469985055
Epoch: 12, Training Loss: 0.017823153707712334, Validation Loss: 0.02069

In [158]:
torch.save(Model.state_dict(), 'Model.pth')

In [133]:
lossTest = 0
for batch in testLoader:
    movies, users, targets = batch
    movies = movies.to(device)
    users = users.to(device)
    targets = targets.to(device)
    output = Model(movies, users)
    loss = loss_fn(output, targets)
    lossTest += loss.item() * targets.size(0)
    
print(f"Test Loss: {lossTest/len(testLoader.dataset)}")
    

Test Loss: 0.021130094071323983


In [153]:
testUser = [
    123,  # user_id
    3,    # rating_count
    0.0,  # rating_average
    0.0,  # action
    5.0,  # adventure
    10,  # animation
    0.0,  # children's
    0.0,  # comedy
    0.0,  # crime
    0.0,  # documentary
    0.0,  # drama
    5.0,  # fantasy
    0.0,  # horror
    0.0,  # mystery
    0.0,  # romance
    0.0,  # sci-fi
    0.0   # thriller
]

userVecs = np.tile(testUser[3:], (moviesVecs.shape[0], 1))
userVecsScaled = scalerUsers.transform(userVecs)

moviesVecsScaled = scalerMovies.transform(moviesVecs.iloc[:, 1:])

ratePredicted = Model(torch.tensor(moviesVecsScaled, dtype=torch.float32).to(device), torch.tensor(userVecsScaled, dtype=torch.float32).to(device))
ratePredictedUnscaled = scalerY.inverse_transform(ratePredicted.cpu().detach().numpy().flatten().reshape(-1, 1))

sortedIndexes = np.argsort(ratePredicted.cpu().detach().numpy().flatten())[::-1]
sortedRates = ratePredictedUnscaled[sortedIndexes]
sortedMovies = moviesVecs.iloc[sortedIndexes]

print(sortedMovies.head(10))

     MovieId  Year    Rating  Action  Adventure  Animation  Children  Comedy  \
160     6283  2001  3.900000       1          0          1         0       0   
171     6377  2003  3.960993       0          1          1         1       1   
53      4886  2001  3.871212       0          1          1         1       1   
575    68954  2009  4.004762       0          1          1         1       0   
289     8961  2004  3.836000       1          1          1         1       1   
108     5444  2002  3.810345       0          1          1         1       0   
615    78499  2010  4.109091       0          1          1         1       1   
122     5618  2001  4.155172       0          1          1         0       0   
558    65261  2008  4.000000       0          1          1         1       0   
606    76093  2010  3.943396       0          1          1         1       0   

     Crime  Documentary  Drama  Fantasy  Film-Noir  Horror  IMAX  Musical  \
160      0            0      0        0   

