#### **Library imports**

In [1]:
# Library imports
import pyforest
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from tqdm import tqdm
from pprint import pprint
from time import sleep

from turtle import forward
import torch.nn as nn
import torch.nn.functional as F
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from torch.utils.data import DataLoader, TensorDataset

#### **Hyperparams and loading data**

In [2]:
train_edges = np.load('train_edges.npy')
adj = np.load('adj.npy')
train_edges.shape
users = torch.LongTensor(train_edges[:, 0])
items = torch.LongTensor(train_edges[:, 1])
ratings = torch.FloatTensor(train_edges[:, 2])

# some hyperparams
epochs = 100
lr = 0.01
n_users = 943 
n_items = 1682

#### **Defining collaborative filtering**

In [3]:
class CollaborativeFiltering(Module):
    def __init__(self, n_users, n_items, n_factors):
        super(CollaborativeFiltering, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.item_emb = nn.Embedding(n_items, n_factors)

    def forward(self, user, item):
        u = self.user_emb(user)
        i = self.item_emb(item)
        dot = (u * i).sum(1)
        return torch.sigmoid(dot)

# This architecture is giving me the best performance yet, 
# using BCE loss, n_factors = 64 and epochs = 200 with a final loss of 0.088

In [4]:
def get_accuracy(y_hat, y):
    y = y.clone().int()
    y_hat = (y_hat.clone() > 0.5).int()
    accuracy = (y == y_hat).sum() / len(y)
    return accuracy.item()

#### **Testing inner collaborative filtering model**

In [7]:
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

losses = []
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')
epochs = 200
n_samples = len(ratings)

for _ in tqdm(range(epochs)):
    y_hat = model(users, items)
    loss = loss_fn(y_hat, ratings)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    losses.append(loss.item())
sleep(1)

print('Training BCE loss after {} epochs: {}'.format(epochs, round(losses[-1], 3)))
print('Training accuracy after {} epochs: {}'.format(epochs, round(get_accuracy(y_hat, ratings), 3)))

100%|██████████| 200/200 [00:16<00:00, 12.44it/s]


Training BCE loss after 200 epochs: 0.094
Training accuracy after 200 epochs: 0.984


#### **Storing model parameters**

In [8]:
model.user_emb

Embedding(943, 64)

In [9]:
# self.weights = []
# user_emb = 

#### **Collaborative filtering with manual gradient updates**

An explanation of what `retain_graph = True` does:
- https://stackoverflow.com/questions/46774641/what-does-the-parameter-retain-graph-mean-in-the-variables-backward-method
- https://blog.paperspace.com/pytorch-101-understanding-graphs-and-automatic-differentiation/ (Paperspace blog on Autograd)
- https://pytorch.org/blog/computational-graphs-constructed-in-pytorch/ (Official blog on computational graphs)

An explanation of what `create_graph = True` does:
- If `True`, graph of the derivative will be constructed, allowing to compute higher order derivative products. 
- Source - https://pytorch.org/docs/stable/generated/torch.autograd.grad.html#torch-autograd-grad

In [None]:
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
losses = []
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')
epochs = 200
n_samples = len(ratings)

for _ in tqdm(range(epochs)):
    y_hat = model(users, items)
    loss = loss_fn(y_hat, ratings)
    

##### **Start executing from here**

In [28]:
users = torch.LongTensor(train_edges[:, 0])
items = torch.LongTensor(train_edges[:, 1])
ratings = torch.FloatTensor(train_edges[:, 2])
n_samples = len(ratings)
ratings.requires_grad_() # set requires_grad = True for ratings
print(ratings)

tensor([1., 1., 0.,  ..., 1., 1., 0.], requires_grad=True)


In [29]:
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
optimizer = torch.optim.Adam(model.parameters(), lr = lr) 
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')

In [30]:
y_hat = model(users, items)
loss = loss_fn(y_hat, ratings)
print(loss)
print(loss.item())

tensor(4.0821, grad_fn=<BinaryCrossEntropyBackward0>)
4.082079887390137


In [31]:
# optimizer.zero_grad()
# loss.backward()
# optimizer.step()
# torch.autograd.grad(loss, ratings) # this line doesn't work with above code
# # next time try updating weights manually without using optimizer

In [32]:
p1, p2 = model.parameters()
p1

Parameter containing:
tensor([[-1.6400,  1.2311, -0.2783,  ..., -0.0792,  0.6679,  0.5190],
        [-1.2116, -1.2453,  0.3350,  ..., -0.8086, -1.0625, -0.7433],
        [-0.0861,  0.5862,  0.0486,  ..., -1.5140,  0.0466,  0.2254],
        ...,
        [-1.5609,  0.9318, -1.1871,  ...,  1.8340,  0.5131,  0.7619],
        [ 0.5751, -0.0915, -1.4706,  ..., -0.6700,  0.9471,  1.3083],
        [ 1.2015, -0.1723, -1.0820,  ..., -1.0306,  0.7489, -0.3371]],
       requires_grad=True)

In [33]:
p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
p1_grad

(tensor([[-2.1710e-04,  5.9523e-05,  3.1794e-06,  ..., -5.7551e-05,
          -3.5378e-05,  1.0161e-04],
         [ 1.8902e-05, -6.0580e-05,  3.5197e-05,  ..., -6.2680e-05,
           2.3283e-05, -1.0836e-05],
         [-1.0293e-05,  3.6901e-06, -1.9097e-05,  ...,  3.0192e-05,
           1.4289e-05,  4.8925e-05],
         ...,
         [-3.7497e-06,  2.2213e-05, -2.4738e-05,  ..., -3.2561e-06,
          -9.1096e-07, -6.7995e-06],
         [ 1.6786e-05,  3.4801e-05, -1.2662e-04,  ..., -2.4084e-05,
           3.1077e-05,  9.3164e-05],
         [ 1.3467e-04, -2.3466e-05, -7.7969e-05,  ..., -5.8620e-05,
           7.4789e-06,  6.4515e-06]], grad_fn=<EmbeddingDenseBackwardBackward0>),)

In [34]:
p1_grad[0]

tensor([[-2.1710e-04,  5.9523e-05,  3.1794e-06,  ..., -5.7551e-05,
         -3.5378e-05,  1.0161e-04],
        [ 1.8902e-05, -6.0580e-05,  3.5197e-05,  ..., -6.2680e-05,
          2.3283e-05, -1.0836e-05],
        [-1.0293e-05,  3.6901e-06, -1.9097e-05,  ...,  3.0192e-05,
          1.4289e-05,  4.8925e-05],
        ...,
        [-3.7497e-06,  2.2213e-05, -2.4738e-05,  ..., -3.2561e-06,
         -9.1096e-07, -6.7995e-06],
        [ 1.6786e-05,  3.4801e-05, -1.2662e-04,  ..., -2.4084e-05,
          3.1077e-05,  9.3164e-05],
        [ 1.3467e-04, -2.3466e-05, -7.7969e-05,  ..., -5.8620e-05,
          7.4789e-06,  6.4515e-06]], grad_fn=<EmbeddingDenseBackwardBackward0>)

In [35]:
p1 = p1 - lr * p1_grad[0]
p1
# This way it's working

tensor([[-1.6400,  1.2311, -0.2783,  ..., -0.0792,  0.6679,  0.5190],
        [-1.2116, -1.2453,  0.3350,  ..., -0.8086, -1.0625, -0.7433],
        [-0.0861,  0.5862,  0.0486,  ..., -1.5140,  0.0466,  0.2254],
        ...,
        [-1.5609,  0.9318, -1.1871,  ...,  1.8340,  0.5131,  0.7619],
        [ 0.5751, -0.0915, -1.4706,  ..., -0.6700,  0.9471,  1.3083],
        [ 1.2015, -0.1723, -1.0820,  ..., -1.0306,  0.7489, -0.3371]],
       grad_fn=<SubBackward0>)

In [36]:
lr

0.01

In [37]:
# Gradients w.r.t ratings works when create_graph = True is set for previous gradient computation
torch.autograd.grad(loss, ratings)

(tensor([ 1.0089e-05,        -inf, -9.0750e-06,  ...,  6.2042e-05,
          1.3535e-06,  7.5071e-05]),)

In [48]:
inner_grad = torch.autograd.grad(loss, )

tensor(4.0670, grad_fn=<BinaryCrossEntropyBackward0>)

In [None]:
# next try to put all these things together and get a workable code
# it need not even be right, just should be workable

#### **Putting it together: code for meta attack (one iteration)**

#### **Seems to work!!**

In [113]:
# basic data preparation
users = torch.LongTensor(train_edges[:, 0])
items = torch.LongTensor(train_edges[:, 1])
ratings = torch.FloatTensor(train_edges[:, 2])
n_samples = len(ratings)
ratings.requires_grad_() # set requires_grad = True for ratings
print('ratings: ', ratings)
lr = 10

# define model and loss function
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
# optimizer = torch.optim.Adam(model.parameters(), lr = lr) 
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')

# one iteration of forward pass 
y_hat = model(users, items)
loss = loss_fn(y_hat, ratings)
print('loss: ', loss)
print('first pass: loss.item(): ', loss.item())

# one iteration of inner gradient updation for two parameters
p1, p2 = model.parameters()
p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
p2_grad = torch.autograd.grad(loss, p2, create_graph=True)
print('p1 before updation: ', p1)
p1_new = p1 - lr * p1_grad[0]
p2_new = p2 - lr * p2_grad[0]
with torch.no_grad():
    p1.copy_(p1_new)
    p2.copy_(p2_new)
print('p1 after updation: ', p1)
print('inner gradients updated for p1 and p2')

# second iteration of forward pass 
y_hat = model(users, items)
loss = loss_fn(y_hat, ratings)
print('loss: ', loss)
print('second pass: loss.item(): ', loss.item())

# second iteration of inner gradient updation for two parameters
p1, p2 = model.parameters() # try to comment this out and see the change
p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
p2_grad = torch.autograd.grad(loss, p2, create_graph=True)
print('p1 before updation: ', p1)
p1_new = p1 - lr * p1_grad[0]
p2_new = p2 - lr * p2_grad[0]
with torch.no_grad():
    p1.copy_(p1_new)
    p2.copy_(p2_new)
print('p1 after updation: ', p1)
print('inner gradients updated for p1 and p2')

y_hat = model(users, items)
loss = loss_fn(y_hat, ratings)
print('loss: ', loss)
print('third pass: loss.item(): ', loss.item())

# it seems model weights are not updating when updating p1 and p2 manually
# need to investigate

# compute and print meta gradients w.r.t ratings
meta_grad = torch.autograd.grad(loss, ratings)
print('meta gradients: ', meta_grad)

ratings:  tensor([1., 1., 0.,  ..., 1., 1., 0.], requires_grad=True)
loss:  tensor(4.0275, grad_fn=<BinaryCrossEntropyBackward0>)
first pass: loss.item():  4.027473449707031
p1 before updation:  Parameter containing:
tensor([[-0.2015,  0.0094,  0.9397,  ..., -1.3159,  0.2729, -1.6187],
        [-1.2597,  1.5202, -0.2255,  ..., -1.3526,  0.3874, -2.7321],
        [ 0.6932, -1.4194,  0.1402,  ...,  0.2244, -0.5958,  2.5095],
        ...,
        [ 0.4889, -2.0555, -0.5391,  ..., -1.6226, -0.1316,  0.0669],
        [-0.4230,  0.9798, -0.4390,  ...,  0.7863,  1.2866, -0.1795],
        [-0.7211, -0.4675,  1.3677,  ...,  0.9185,  0.1461, -0.7254]],
       requires_grad=True)
p1 after updation:  Parameter containing:
tensor([[-0.2010,  0.0094,  0.9382,  ..., -1.3143,  0.2730, -1.6166],
        [-1.2591,  1.5200, -0.2253,  ..., -1.3520,  0.3870, -2.7318],
        [ 0.6939, -1.4189,  0.1402,  ...,  0.2243, -0.5958,  2.5092],
        ...,
        [ 0.4890, -2.0549, -0.5390,  ..., -1.6224, -0.131

#### **Putting it together: code for meta attack (inner loop)**

In [125]:
# basic data preparation
users = torch.LongTensor(train_edges[:, 0])
items = torch.LongTensor(train_edges[:, 1])
ratings = torch.FloatTensor(train_edges[:, 2])
n_samples = len(ratings)
ratings.requires_grad_() # set requires_grad = True for ratings
print('ratings: ', ratings)

# setting hyperparams
lr = 10
T = 10

# define model and loss function
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
p1, p2 = model.parameters()
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')

# for i in tqdm(range(T)):
for i in range(T):
    y_hat = model(users, items)
    loss = loss_fn(y_hat, ratings)
    print('inner loss at iter {}: {}'.format(i, loss.item()))
    
    p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
    p2_grad = torch.autograd.grad(loss, p2, create_graph=True)

    p1_new = p1 - lr * p1_grad[0]
    p2_new = p2 - lr * p2_grad[0]

    with torch.no_grad():
        p1.copy_(p1_new)
        p2.copy_(p2_new)

meta_grad = torch.autograd.grad(loss, ratings)
print('meta gradients: ', meta_grad)

ratings:  tensor([1., 1., 0.,  ..., 1., 1., 0.], requires_grad=True)
inner loss at iter 0: 4.081691265106201
inner loss at iter 1: 4.070385932922363
inner loss at iter 2: 4.059072017669678
inner loss at iter 3: 4.049466133117676
inner loss at iter 4: 4.039066791534424
inner loss at iter 5: 4.027796268463135
inner loss at iter 6: 4.0173845291137695
inner loss at iter 7: 4.006999969482422
inner loss at iter 8: 3.9974775314331055
inner loss at iter 9: 3.986273765563965
meta gradients:  (tensor([-3.2998e-05, -4.8649e-05,        -inf,  ...,  5.9168e-06,
         2.3956e-05,  2.2765e-05]),)


#### **Inner loop experiments**

In [153]:
# set seed to make results reproducible
torch.manual_seed(0)

# basic data preparation
users = torch.LongTensor(train_edges[:, 0])
items = torch.LongTensor(train_edges[:, 1])
ratings = torch.FloatTensor(train_edges[:, 2])
n_samples = len(ratings)
ratings.requires_grad_() # set requires_grad = True for ratings
print('ratings: ', ratings)

# setting hyperparams
lr = 10
T = 20

# define model and loss function
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
p1, p2 = model.parameters()
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')

# for i in tqdm(range(T)):
for i in range(T):
    y_hat = model(users, items)
    loss = loss_fn(y_hat, ratings)
    print('inner loss at iter {}: {}'.format(i, loss.item()))
    
    p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
    p2_grad = torch.autograd.grad(loss, p2, create_graph=True)

    p1_new = p1 - lr * p1_grad[0]
    p2_new = p2 - lr * p2_grad[0]

    with torch.no_grad():
        p1.copy_(p1_new)
        p2.copy_(p2_new)

meta_grad = torch.autograd.grad(loss, ratings)[0]
print('meta gradients: ', meta_grad)

ratings:  tensor([1., 1., 0.,  ..., 1., 1., 0.], requires_grad=True)
inner loss at iter 0: 4.066965103149414
inner loss at iter 1: 4.05279541015625
inner loss at iter 2: 4.040768146514893
inner loss at iter 3: 4.0320844650268555
inner loss at iter 4: 4.020899295806885
inner loss at iter 5: 4.009313583374023
inner loss at iter 6: 3.9977314472198486
inner loss at iter 7: 3.9878790378570557
inner loss at iter 8: 3.976311206817627
inner loss at iter 9: 3.9639394283294678
inner loss at iter 10: 3.9541139602661133
inner loss at iter 11: 3.9426400661468506
inner loss at iter 12: 3.931095600128174
inner loss at iter 13: 3.917526960372925
inner loss at iter 14: 3.909027099609375
inner loss at iter 15: 3.899671792984009
inner loss at iter 16: 3.8882553577423096
inner loss at iter 17: 3.8784937858581543
inner loss at iter 18: 3.865010976791382
inner loss at iter 19: 3.85528302192688
meta gradients:  tensor([ 5.1595e-06,  2.0629e-05, -9.9811e-06,  ..., -1.5046e-05,
        -2.3528e-05,  1.8430e-05

In [128]:
ratings

tensor([1., 1., 0.,  ..., 1., 1., 0.], requires_grad=True)

In [146]:
import math 

max_meta_grad = -math.inf 
edge_to_add = -1
for i in range(n_samples):
    if ratings[i] == 0:
        if meta_grad[i] > max_meta_grad:
            max_meta_grad = meta_grad[i]
            edge_to_add = i 
print(max_meta_grad)
print(edge_to_add)
print(users[i])
print(items[i])
print(ratings[i])

tensor(0.0002)
152311
tensor(13)
tensor(1628)
tensor(0., grad_fn=<SelectBackward0>)


#### **Experiment with modified ratings**

In [148]:
ratings_mod = train_edges[:, 2]
print(ratings_mod[152311])
ratings_mod[152311] = 1
print(ratings_mod[152311])
ratings_mod = torch.FloatTensor(ratings_mod)
ratings_mod.requires_grad_()

# set seed to make results reproducible
torch.manual_seed(0)

# define model and loss function
model = CollaborativeFiltering(n_users, n_items, n_factors = 64)
p1, p2 = model.parameters()
model.train()
loss_fn = nn.BCELoss(reduction = 'mean')

# for i in tqdm(range(T)):
T = 50
for i in range(T):
    y_hat = model(users, items)
    loss = loss_fn(y_hat, ratings_mod)
    print('inner loss at iter {}: {}'.format(i, loss.item()))
    
    p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
    p2_grad = torch.autograd.grad(loss, p2, create_graph=True)

    p1_new = p1 - lr * p1_grad[0]
    p2_new = p2 - lr * p2_grad[0]

    with torch.no_grad():
        p1.copy_(p1_new)
        p2.copy_(p2_new)

meta_grad = torch.autograd.grad(loss, ratings_mod)[0]
print('meta gradients: ', meta_grad)

1
1
inner loss at iter 0: 4.066965103149414
inner loss at iter 1: 4.05279541015625
inner loss at iter 2: 4.040768146514893
inner loss at iter 3: 4.0320844650268555
inner loss at iter 4: 4.020899295806885
inner loss at iter 5: 4.009313583374023
inner loss at iter 6: 3.9977314472198486
inner loss at iter 7: 3.9878790378570557
inner loss at iter 8: 3.976311206817627
inner loss at iter 9: 3.9639394283294678
inner loss at iter 10: 3.9541139602661133
inner loss at iter 11: 3.9426400661468506
inner loss at iter 12: 3.931095600128174
inner loss at iter 13: 3.917526960372925
inner loss at iter 14: 3.909027099609375
inner loss at iter 15: 3.899671792984009
inner loss at iter 16: 3.8882553577423096
inner loss at iter 17: 3.8784937858581543
inner loss at iter 18: 3.865010976791382
inner loss at iter 19: 3.85528302192688
inner loss at iter 20: 3.843494176864624
inner loss at iter 21: 3.8321027755737305
inner loss at iter 22: 3.822427272796631
inner loss at iter 23: 3.8114821910858154
inner loss at 

In [134]:
meta_grad[0]

tensor([ 5.6357e-06,  2.1075e-05, -1.0639e-05,  ..., -1.5299e-05,
        -2.3461e-05,  1.8523e-05])

#### **Random Experiments**

In [112]:
p1, p2 = model.parameters()
print(p1)
print(p2)

Parameter containing:
tensor([[-0.7120, -0.0302, -1.2044,  ...,  0.9276,  0.6895, -0.4508],
        [-1.1461, -0.6966, -0.2378,  ..., -0.2327,  0.6188,  0.9167],
        [ 0.3843, -2.0148, -1.0853,  ...,  1.7147, -0.3024,  0.6696],
        ...,
        [ 1.2213,  0.2022, -0.0885,  ...,  1.5958,  1.3737, -1.0703],
        [-0.9831,  0.7369, -1.5582,  ...,  1.3333,  0.0863,  0.3997],
        [ 1.4446,  0.7950, -0.0190,  ...,  0.0838,  0.8825, -1.3919]],
       requires_grad=True)
Parameter containing:
tensor([[-1.4700e+00,  6.6813e-02,  3.5546e-01,  ..., -8.3447e-01,
         -6.8905e-01, -5.9982e-01],
        [-7.7575e-01, -3.0664e-02,  1.6253e+00,  ...,  7.7636e-01,
         -1.0212e+00, -1.8676e+00],
        [ 5.7605e-01,  7.0709e-01, -8.5691e-01,  ...,  8.8496e-01,
         -2.7092e-02, -1.1606e+00],
        ...,
        [-7.8321e-02,  5.4087e-01, -2.2929e-02,  ...,  2.2237e-01,
          7.0521e-01,  7.6299e-01],
        [-1.4083e+00, -9.4570e-01, -1.2581e+00,  ...,  3.3569e-01,
   

In [42]:
state_dict = model.state_dict()

for name, param in state_dict.items():
    print(name)
    print(param)


user_emb.weight
tensor([[-0.6898,  1.2371,  0.4595,  ..., -0.5926, -1.2892, -0.5708],
        [-0.3267, -0.3558,  0.7712,  ..., -1.2938, -2.1441, -1.3277],
        [ 0.6864, -0.2246, -0.6704,  ..., -0.4446,  0.0593,  0.5291],
        ...,
        [-0.7997, -0.0640, -0.7172,  ..., -0.4175, -0.5013, -0.1714],
        [ 0.8728,  1.6324, -0.5116,  ..., -0.2887,  1.2637, -0.2122],
        [ 1.2795, -1.6052,  0.0071,  ...,  0.5694,  1.2050, -0.8763]])
item_emb.weight
tensor([[-0.0990,  0.4868, -1.5878,  ..., -0.7078,  0.6567,  1.1695],
        [-1.0863, -1.0977,  0.9486,  ...,  0.5633, -0.2189, -0.2361],
        [-1.0151,  1.5876, -0.3008,  ..., -2.5757,  0.2672,  1.7589],
        ...,
        [ 0.2436, -0.7643,  0.5386,  ...,  0.5982, -0.2018,  2.3530],
        [ 1.2479, -0.1559,  1.0687,  ...,  1.8135,  0.7614,  1.3881],
        [-0.3744, -0.6366,  0.1969,  ..., -0.1726,  1.0843,  0.7719]])


In [44]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.6898,  1.2371,  0.4595,  ..., -0.5926, -1.2892, -0.5708],
        [-0.3267, -0.3558,  0.7712,  ..., -1.2938, -2.1441, -1.3277],
        [ 0.6864, -0.2246, -0.6704,  ..., -0.4446,  0.0593,  0.5291],
        ...,
        [-0.7997, -0.0640, -0.7172,  ..., -0.4175, -0.5013, -0.1714],
        [ 0.8728,  1.6324, -0.5116,  ..., -0.2887,  1.2637, -0.2122],
        [ 1.2795, -1.6052,  0.0071,  ...,  0.5694,  1.2050, -0.8763]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0990,  0.4868, -1.5878,  ..., -0.7078,  0.6567,  1.1695],
        [-1.0863, -1.0977,  0.9486,  ...,  0.5633, -0.2189, -0.2361],
        [-1.0151,  1.5876, -0.3008,  ..., -2.5757,  0.2672,  1.7589],
        ...,
        [ 0.2436, -0.7643,  0.5386,  ...,  0.5982, -0.2018,  2.3530],
        [ 1.2479, -0.1559,  1.0687,  ...,  1.8135,  0.7614,  1.3881],
        [-0.3744, -0.6366,  0.1969,  ..., -0.1726,  1.0843,  0.7719]],
       requires_grad=True)


#### **Explore nn.Embedding()**

In [46]:
nn.Embedding(n_users, n_items)

Embedding(943, 1682)

In [None]:
# def forward(self, user, item):
#         u = self.user_emb(user)
#         i = self.item_emb(item)
#         dot = (u * i).sum(1)
#         return torch.sigmoid(dot)

In [54]:
n_factors = 32
user_emb = nn.Embedding(n_users, n_factors)
item_emb = nn.Embedding(n_items, n_factors)

In [55]:
user_emb.weight

Parameter containing:
tensor([[ 0.4439, -1.9483, -0.9516,  ..., -1.4186, -1.3594,  0.2810],
        [-1.1488, -0.0379, -0.6078,  ...,  0.9696,  0.4705,  0.2289],
        [-0.4593,  1.6118, -1.6503,  ..., -1.1451,  0.0371, -1.4867],
        ...,
        [ 0.4872,  0.7919, -0.3043,  ...,  0.2604, -0.1841,  1.1218],
        [-0.4748,  0.5623, -0.5283,  ..., -1.3008, -0.1314, -0.4233],
        [-0.8145, -0.2831, -1.4781,  ..., -0.9306,  1.4456,  0.6241]],
       requires_grad=True)

In [57]:
u = user_emb(users)
u

tensor([[-1.4081,  0.6106, -0.6611,  ..., -1.2111, -0.4488, -1.0079],
        [ 0.5214,  1.6385,  2.1922,  ...,  0.8036,  0.1464, -1.1281],
        [-0.2598, -1.0658, -1.0823,  ...,  2.8500,  0.2176, -1.6161],
        ...,
        [ 1.8614,  1.4431,  0.1386,  ..., -2.3231, -0.1649,  0.1128],
        [-0.1436, -1.0374,  0.8331,  ..., -0.5254,  1.5097, -0.2469],
        [-2.5575,  0.8269, -0.8540,  ..., -0.7730, -0.4882, -0.7975]],
       grad_fn=<EmbeddingBackward0>)

In [94]:
ratings = torch.FloatTensor(train_edges[:, 2])
ratings.requires_grad_() # set requires_grad = True for ratings
print('ratings: ', ratings)
loss_fn = nn.BCELoss(reduction = 'mean')

n_factors = 32
user_emb = nn.Embedding(n_users, n_factors)
item_emb = nn.Embedding(n_items, n_factors)

# first forward pass 
u = user_emb(users)
i = item_emb(items)
dot = (u * i).sum(1)
# dot = torch.tensor(1)
y_hat = torch.sigmoid(dot)
# y_hat = 0

# compute loss
loss = loss_fn(y_hat, ratings)
print('loss: ', loss)
print('loss.item(): ', loss.item())

print(user_emb.weight)
user_grad = torch.autograd.grad(loss, user_emb.weight, create_graph=True)
item_grad = torch.autograd.grad(loss, item_emb.weight, create_graph=True)
user_emb.weight = nn.Parameter(user_emb.weight - 1000 * user_grad[0])
# item_emb.weight = item_emb.weight - lr * item_grad[0]
print(user_emb.weight)
print(user_grad)



ratings:  tensor([1., 1., 0.,  ..., 1., 1., 0.], requires_grad=True)
loss:  tensor(2.4477, grad_fn=<BinaryCrossEntropyBackward0>)
loss.item():  2.447725772857666
Parameter containing:
tensor([[ 0.5933, -1.1616, -1.1742,  ...,  0.5817, -0.7269,  0.8990],
        [-0.1532,  1.6284,  0.4429,  ...,  0.6275,  1.0835,  0.0712],
        [ 0.9543,  1.2078,  1.2772,  ..., -0.6062, -0.6404, -2.1918],
        ...,
        [ 0.2970, -1.1326, -0.1476,  ...,  2.5913,  1.8091, -0.8726],
        [ 1.2338, -0.2983, -0.7845,  ...,  0.1493, -1.1972,  0.6469],
        [ 1.3923, -0.2281,  0.8573,  ...,  0.4372, -0.9985, -0.3536]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6581, -0.9044, -1.0907,  ...,  0.4492, -0.5738,  0.8677],
        [-0.0961,  1.4929,  0.4090,  ...,  0.5991,  1.0047,  0.0726],
        [ 0.8593,  1.1519,  1.2632,  ..., -0.5731, -0.6128, -2.0757],
        ...,
        [ 0.3062, -1.1411, -0.1266,  ...,  2.5627,  1.7878, -0.8593],
        [ 1.2029, -0.3668, -0.7381,  ...

In [66]:
user_emb.weight

Parameter containing:
tensor([[ 0.4439, -1.9483, -0.9516,  ..., -1.4186, -1.3594,  0.2810],
        [-1.1488, -0.0379, -0.6078,  ...,  0.9696,  0.4705,  0.2289],
        [-0.4593,  1.6118, -1.6503,  ..., -1.1451,  0.0371, -1.4867],
        ...,
        [ 0.4872,  0.7919, -0.3043,  ...,  0.2604, -0.1841,  1.1218],
        [-0.4748,  0.5623, -0.5283,  ..., -1.3008, -0.1314, -0.4233],
        [-0.8145, -0.2831, -1.4781,  ..., -0.9306,  1.4456,  0.6241]],
       requires_grad=True)

In [72]:
p1, p2 = model.parameters()
print(p1)
p1 = p1 - lr * p1_grad[0]
print(p1)

Parameter containing:
tensor([[ 1.5088, -1.1876, -0.2879,  ..., -0.7009,  2.4997,  0.3683],
        [-0.2103, -0.2408, -2.2474,  ...,  0.4838, -0.4001, -0.2369],
        [-1.6945,  0.5134, -0.2179,  ..., -0.8171, -0.9805,  1.2394],
        ...,
        [-0.5910,  1.0138,  0.1980,  ...,  0.5972,  0.1417, -0.3724],
        [ 0.5018,  0.2675,  0.1218,  ..., -0.7895, -1.6456,  0.3717],
        [ 0.7160,  0.0197,  0.8263,  ...,  0.0859, -0.5210,  0.2499]],
       requires_grad=True)
tensor([[ 1.5088, -1.1876, -0.2879,  ..., -0.7009,  2.4997,  0.3683],
        [-0.2103, -0.2408, -2.2474,  ...,  0.4838, -0.4001, -0.2369],
        [-1.6945,  0.5134, -0.2179,  ..., -0.8171, -0.9805,  1.2394],
        ...,
        [-0.5910,  1.0138,  0.1980,  ...,  0.5972,  0.1417, -0.3724],
        [ 0.5018,  0.2675,  0.1218,  ..., -0.7895, -1.6456,  0.3717],
        [ 0.7160,  0.0197,  0.8263,  ...,  0.0859, -0.5210,  0.2499]],
       grad_fn=<SubBackward0>)
