In [51]:
from torch.utils.data import DataLoader, Subset, random_split, Dataset
from torchvision import transforms, datasets
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
import torch.optim as optim

import numpy as np
import torch
import torch.nn as nn


In [5]:
data = pd.read_csv("data/user/user1_0.csv")

In [65]:
class MyDataset(Dataset):
    def __init__(self, data, label, **hyperparameters):
        super(MyDataset, self).__init__()
        self.data = data
        self.label = label

    def __len__(self):
        return len(self.data)

    def __getitem__(self, i):
        return self.data[i], self.label[i]

In [41]:
class EmbeddingNet(nn.Module):
  def __init__(self, n_users, n_movies, n_factors=50, embedding_dropout=0.02, hidden=10, dropouts=0.2):
    super().__init__()
            
    self.u = nn.Embedding(n_users, n_factors)
    self.m = nn.Embedding(n_movies, n_factors)
    self.drop = nn.Dropout(embedding_dropout)
    self.hidden = nn.Sequential(
        nn.Linear(n_factors*2, 64),
        nn.ReLU(),
        nn.Dropout(dropouts),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Dropout(dropouts),
        nn.Linear(32, 16),
        nn.ReLU(),
        nn.Dropout(dropouts),
    )
    self.fc = nn.Linear(16, 1)


  def forward(self, users, movies, minmax=None):
    features = torch.cat([self.u(users), self.m(movies)], dim=1)
    x = self.drop(features)
    x = self.hidden(x)
    out = self.fc(x)
    # if minmax is not None:
    #     min_rating, max_rating = minmax
    #     out = out*(max_rating - min_rating + 1) + min_rating - 0.5
    return out

In [42]:
n_users, n_movies = 943, 1664
model = EmbeddingNet(n_users=n_users, n_movies=n_movies, n_factors=150, hidden=[500, 500, 500], embedding_dropout=0.05, dropouts=0.05)

In [54]:
lr = 1e-3
wd = 1e-5
batch_size = 1024
minmax = (1.0,5.0)

In [80]:
X_train = pd.read_csv("data/user/user1_0.csv")[["user","movie"]].to_numpy()

In [81]:
y_train = pd.read_csv("data/user/rating1_0.csv")[["rating"]].to_numpy()


In [88]:
t = torch.from_numpy(y_train).squeeze()
d = torch.from_numpy(X_train)

In [89]:
d.shape

torch.Size([4527, 2])

In [132]:
X_train = pd.read_csv("data/user/user1_0.csv")[["user","movie"]].to_numpy()
y_train = pd.read_csv("data/user/rating1_0.csv")[["rating"]].to_numpy()

X_train = torch.as_tensor(X_train)
y_train = torch.as_tensor(y_train,dtype=torch.float).squeeze()


X_test = pd.read_csv("data/test/test_u.csv")[["user","movie"]].to_numpy()
y_test = pd.read_csv("data/test/test_r.csv")[["rating"]].to_numpy()

X_test = torch.as_tensor(X_test)
y_test = torch.as_tensor(y_test,dtype=torch.float).squeeze()

max_rating=5.0
min_rating=1.0
y_train = (y_train - min_rating)/(max_rating - min_rating)
y_test = (y_test - min_rating)/(max_rating - min_rating)

train_dataset = MyDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = MyDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [105]:
y_test = pd.read_csv("data/test/test_r.csv")[["rating"]].to_numpy()
y_test = torch.from_numpy(y_test).squeeze()


In [121]:
y_train = pd.read_csv("data/user/rating1_0.csv")[["rating"]].to_numpy()

In [128]:
y_train[7]

tensor(1.)

In [47]:
criterion = nn.MSELoss()
# criterion = nn.CrossEntropyLoss() 
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
train_loss = []
val_loss = []

model.train()

EmbeddingNet(
  (u): Embedding(943, 150)
  (m): Embedding(1664, 150)
  (drop): Dropout(p=0.05, inplace=False)
  (hidden): Sequential(
    (0): Linear(in_features=300, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.05, inplace=False)
    (3): Linear(in_features=64, out_features=32, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.05, inplace=False)
    (6): Linear(in_features=32, out_features=16, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.05, inplace=False)
  )
  (fc): Linear(in_features=16, out_features=1, bias=True)
)

In [136]:
def test(data_loader, model, criterion):
  model.eval()
  acc = 0
  total_loss = 0.0
  with torch.no_grad():
      for data, label in data_loader:
        # label = label.type(torch.LongTensor)
        outputs = model(data[:, 0], data[:, 1], minmax=minmax)
        loss = criterion(outputs, label)
        total_loss += loss.item()
        _, pred_labels = torch.max(outputs, dim=1)
        for pred_label, gt_label in zip(pred_labels.view(-1), label.view(-1)):
            if pred_label == gt_label:
                acc += 1
  model.train()
  return acc, total_loss

In [137]:
for epoch in range(1, 100):
  running_loss = 0.0
  acc = 0
  for i, (data, label) in enumerate(train_loader):
    with torch.autograd.set_detect_anomaly(True):
      # label = label.type(torch.LongTensor)
      optimizer.zero_grad()
      outputs = model(data[:, 0], data[:, 1], minmax=minmax)
      _, pred_labels = torch.max(outputs, dim=1)
      for pred_label, gt_label in zip(pred_labels.view(-1), label.view(-1)):
        if pred_label + 1 == gt_label:
          acc += 1
      loss = criterion(outputs, label)
      loss.backward()
      optimizer.step()
      running_loss += loss.item()
  test_acc, test_loss = test(test_loader, model, criterion)
  train_loss.append(running_loss * batch_size / len(X_train))
  val_loss.append(test_loss * batch_size / len(X_test))
  print(running_loss * batch_size / len(X_train), test_loss * batch_size / len(X_test))



  return F.mse_loss(input, target, reduction=self.reduction)


0.175188479582574 0.14884249764674884
0.13049255735120527 0.11464090911650969
0.12469006659101381 0.10810966989102072
0.11439814819308973 0.11131360083668976
0.1106763080911234 0.11126433423194387
0.1108109457539701 0.10622811427446402
0.10908836599004065 0.10107422410664611
0.10831618261621676 0.0996232364696629
0.10683219543955882 0.10020241158656634
0.10762548393989363 0.1008480726297545
0.10757200570988133 0.10053315234399487
0.10653879828397238 0.09942870833570046
0.10566704347482811 0.09855196923166962
0.1068372277726868 0.09868695824411712
0.10578379326633515 0.09905991779048082
0.10713041426462924 0.09867305621699081
0.10708194982934424 0.09836833881160081
0.1058070892947437 0.09851895775216274
0.10531510204991837 0.09880288601399903
0.10697233058085946 0.09765694483352401
0.10575333467403047 0.09785822463729078
0.10561057382107097 0.0979107825184538
0.10660414614634244 0.09722078451541147
0.10535805046834876 0.0979021873971524
0.10570918631548383 0.09809208662364045
0.10669740

In [138]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.10.0-cp39-cp39-macosx_10_14_x86_64.whl (241.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting h5py>=2.9.0
  Downloading h5py-3.7.0-cp39-cp39-macosx_10_9_x86_64.whl (3.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting astunparse>=1.6.0
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting tensorflow-estimator<2.11,>=2.10.0
  Downloading tensorflow_estimator-2.10.0-py2.py3-none-any.whl (438 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m438.7/438.7 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting tensorflow-io-gcs-filesystem>=0.23.1
  Downloading tensorflow_io_gcs_filesystem-0.27.0-cp39-cp39-macosx_10_14_x86_64.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 2.11.0
    Uninstalling tensorboard-2.11.0:
      Successfully uninstalled tensorboard-2.11.0
Successfully installed astunparse-1.6.3 flatbuffers-22.10.26 gast-0.4.0 google-pasta-0.2.0 h5py-3.7.0 keras-2.10.0 keras-preprocessing-1.1.2 libclang-14.0.6 opt-einsum-3.3.0 tensorboard-2.10.1 tensorflow-2.10.0 tensorflow-estimator-2.10.0 tensorflow-io-gcs-filesystem-0.27.0
Note: you may need to restart the kernel to use updated packages.


In [147]:
%tensorboard --logdir training_logs/127.0.0.1:56428/experiment_0

In [140]:
%load_ext tensorboard