In [2]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import scipy
import math

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Here, I used preprocessed KMRD data.

class TrainDataset(Dataset):
  def __init__(self):
    data_path = "/content/gdrive/MyDrive/colab notebook/data/"
    Xtr = pd.read_csv(os.path.join(data_path, 'movie_train_X.csv'), encoding='utf-8')
    ytr = pd.read_csv(os.path.join(data_path, 'movie_train_y.csv'), encoding='utf-8')
    Xtr = Xtr.to_numpy()
    ytr = ytr.to_numpy()
    Xtr = Xtr[:, 1:]
    ytr = ytr[:, 1:]
    Xtr = torch.from_numpy(Xtr.astype(np.float32))
    ytr = torch.from_numpy(ytr.astype(np.float32))
    self.X = Xtr
    self.y = ytr

  def __len__(self):
      return self.X.size(0)

  def __getitem__(self, index):
      return self.X[index], self.y[index]


In [5]:
class FM(nn.Module):
  def __init__(self, n, D):
    super().__init__()
    self.n = n
    self.D = D
    # n * D
    self.V = nn.Embedding(self.n, self.D)
    self.w_0 = nn.Parameter(torch.nn.Parameter(torch.randn(1))) 
    self.w = nn.Parameter(torch.nn.Parameter(torch.randn(self.n)))
  def forward(self, x):
    # print(x, 'a')
    # print(x.shape, 'b')
    # print(self.w_0.shape, 'c')
    # print((x@self.w).shape, 'd')
    # print((x @ self.V.weight).square().sum(dim=1).shape, 'e')
    # print(((x.square())@((self.V.weight).square()).sum(dim=1)).shape, 'f')
    return self.w_0 + (x@self.w) + (x @ self.V.weight).square().sum(dim=1) - ((x.square())@((self.V.weight).square())).sum(dim=1)
        


In [8]:
from torch import optim
dataset = TrainDataset()
dataloader = DataLoader(dataset=dataset, batch_size = 8, shuffle = True, num_workers = 2)
print(dataset[0][0].shape[0])
device = torch.device('cuda')
num_of_epoch = 20
lr = 0.0001
model = FM(n=dataset[0][0].shape[0], D = 10).to(device)
model.parameters()

optimizer = optim.SGD(model.parameters(), lr=lr)


for epoch in range(num_of_epoch):
  losses = 0
  for batch_idx, (inputs, labels) in enumerate(dataloader):
    inputs = inputs.to(device)
    labels=labels.squeeze(dim=1)
    labels = labels.to(device)
    pred = model(inputs)
    loss_func=nn.MSELoss()
    loss = loss_func(pred, labels)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    losses+=loss
  print(f"epoch {epoch+1}, loss = {losses/dataset.__len__()}")

1217
epoch 1, loss = 49.3720703125
epoch 2, loss = 33.869163513183594
epoch 3, loss = 27.910171508789062
epoch 4, loss = 24.252126693725586
epoch 5, loss = 21.602449417114258
epoch 6, loss = 19.57708740234375
epoch 7, loss = 17.94602394104004
epoch 8, loss = 16.616914749145508
epoch 9, loss = 15.494355201721191
epoch 10, loss = 14.546618461608887
epoch 11, loss = 13.733144760131836
epoch 12, loss = 13.020082473754883
epoch 13, loss = 12.386199951171875
epoch 14, loss = 11.817056655883789
epoch 15, loss = 11.317814826965332
epoch 16, loss = 10.865198135375977
epoch 17, loss = 10.448053359985352
epoch 18, loss = 10.070577621459961
epoch 19, loss = 9.719149589538574
epoch 20, loss = 9.394039154052734
