#### **Library imports**

In [1]:
# Library imports
import pyforest
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt
from tqdm import tqdm
from pprint import pprint
from time import sleep
import time

from turtle import forward
import torch.nn as nn
import torch.nn.functional as F
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from torch.utils.data import DataLoader, TensorDataset

#### **Hyperparams and loading data**

In [3]:
train_edges = np.load('data/train_edges.npy')
user_list = train_edges[:, 0]
item_list = train_edges[:, 1]
rating_list = train_edges[:, 2].astype('float32')

n_users = 943 
n_items = 1682
n_samples = len(rating_list)

#### **Defining collaborative filtering**

In [30]:
class CollaborativeFiltering(Module):
    def __init__(self, n_users, n_items, n_factors):
        super(CollaborativeFiltering, self).__init__()
        self.user_emb = nn.Embedding(n_users, n_factors)
        self.item_emb = nn.Embedding(n_items, n_factors)

    def forward(self, user, item):
        u = self.user_emb(user)
        i = self.item_emb(item)
        dot = (u * i).sum(1)
        return torch.sigmoid(dot)

#### **Code for meta attack**

In [23]:
# start execution
start_time = time.time()

# some hyperparams
lr = 1
T = 10
Delta = 10
n_factors = 64

# store loss results in this list and later convert to dataframe 
results = []

# list of perturbations
perturbations = dict()
perturbations['edges'] = []
perturbations['metagrad'] = []

# print hyperparam config
print('-> Learning rate: ', lr)
print('-> T: ', T)
print('-> Delta: ', Delta)
print('-> Embedding size: ', n_factors)

for delta in range(Delta):
    # reload the users, items and ratings tensors
    users = torch.LongTensor(train_edges[:, 0])
    items = torch.LongTensor(train_edges[:, 1])
    ratings = torch.FloatTensor(train_edges[:, 2])

    # add those perturbations to "ratings"
    for index in perturbations['edges']:
        ratings[index] = 1

    # set requires_grad for ratings, to compute meta gradients
    ratings.requires_grad_()

    # makes code reproducible
    torch.manual_seed(0)

    # define model and loss
    model = CollaborativeFiltering(n_users, n_items, n_factors)
    p1, p2 = model.parameters()
    loss_fn = nn.BCELoss(reduction = 'mean')
    model.train()

    # inner loop training process
    for i in range(T):
        y_hat = model(users, items)
        loss = loss_fn(y_hat, ratings)
        results.append([delta, i, loss.item()])
        
        p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
        p2_grad = torch.autograd.grad(loss, p2, create_graph=True)

        # compute inner parameter gradients
        with torch.no_grad():
            p1_new = p1 - lr * p1_grad[0]
            p2_new = p2 - lr * p2_grad[0]
            p1.copy_(p1_new)
            p2.copy_(p2_new)
    
    # compute meta gradient
    meta_grad = torch.autograd.grad(loss, ratings)[0]

    # select best edge to perturb
    max_meta_grad = -math.inf
    edge_to_add = -1
    for i in range(n_samples):
        if ratings[i] == 0: # search over only negative edges
            if meta_grad[i] > max_meta_grad:
                max_meta_grad = meta_grad[i]
                edge_to_add = i 

    perturbations['edges'].append(edge_to_add)
    perturbations['metagrad'].append(max_meta_grad.item())

# compute execution time
exec_time = int(time.time() - start_time)
exec_time = time.strftime("%Hh %Mm %Ss", time.gmtime(exec_time))
print('-> Execution time: {}'.format(exec_time))

# store results in CSV files
results = pd.DataFrame(results, columns = ['perturbs', 'iters', 'loss'])
results.to_csv('results/losses_Delta={}_T={}_LR={}_Factors={}'.format(Delta, T, lr, n_factors))

perturbations = pd.DataFrame(perturbations)
perturbations.to_csv('results/perturbations_Delta={}_T={}_LR={}_Factors={}'.format(Delta, T, lr, n_factors))

-> Learning rate:  1
-> T:  10
-> Delta:  10
-> Embedding size:  64
-> Execution time: 00h 00m 20s


#### **Checking stored results**

In [28]:
perturbations.head()

Unnamed: 0,edges,metagrad
0,141472,0.000193
1,152311,0.000175
2,173390,0.000174
3,5665,0.000164
4,112886,0.000164


In [29]:
results.head()

Unnamed: 0,perturbs,iters,loss
0,0,0,4.066596
1,0,1,4.065507
2,0,2,4.064427
3,0,3,4.062918
4,0,4,4.061419


In [1]:
torch.cuda.is_available()

<IPython.core.display.Javascript object>

True

#### **Code for meta attack in GPU**

In [17]:
# start execution
start_time = time.time()

# GPU settings (set use_gpu = -1 if you want to use CPU)
use_gpu = 0
if use_gpu == -1:
    device = 'cpu'
else:
    device = torch.device('cuda:{}'.format(str(use_gpu)) if torch.cuda.is_available() else 'cpu')

# some hyperparams
lr = 1
T = 10
Delta = 1
n_factors = 64

# store loss results in this list and later convert to dataframe 
results = []

# list of perturbations
perturbations = dict()
perturbations['edges'] = []
perturbations['metagrad'] = []

# print hyperparam config
print('-> Learning rate: ', lr)
print('-> T: ', T)
print('-> Delta: ', Delta)
print('-> Embedding size: ', n_factors)
print('-> Device: ', device)

# Load users and items data as tensors
users = torch.tensor(user_list, device = device)
items = torch.tensor(item_list, device = device)
ratings = torch.tensor(rating_list, device = device, requires_grad = True)

# define model
model = CollaborativeFiltering(n_users, n_items, n_factors)
model.to(device)
p1, p2 = model.parameters()

for delta in range(Delta):
    # reload the ratings tensor
    # ratings = torch.tensor(rating_list, device = device, requires_grad = True)

    # add those perturbations to "ratings" (think of a more efficient way to do this)
    # for index in perturbations['edges']:
        # ratings[index] = 1

    # makes code reproducible
    torch.manual_seed(0)

    # reset model paramters 
    for layer in model.children():
        layer.reset_parameters()
    
    # define loss function
    loss_fn = nn.BCELoss(reduction = 'mean')

    model.train()

    # inner loop training process
    for i in range(T):
        y_hat = model(users, items)
        loss = loss_fn(y_hat, ratings)
        results.append([delta, i, loss.item()])
        
        p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
        p2_grad = torch.autograd.grad(loss, p2, create_graph=True)

        # compute inner parameter gradients
        with torch.no_grad():
            p1_new = p1 - lr * p1_grad[0]
            p2_new = p2 - lr * p2_grad[0]
            p1.copy_(p1_new)
            p2.copy_(p2_new)
    
    # compute meta gradient
    meta_grad = torch.autograd.grad(loss, ratings)[0]

    # select best edge to perturb
    max_meta_grad = -math.inf
    edge_to_add = -1
    for i in range(n_samples):
        if ratings[i] == 0: # search over only negative edges
            if meta_grad[i] > max_meta_grad:
                max_meta_grad = meta_grad[i]
                edge_to_add = i 

    perturbations['edges'].append(edge_to_add)
    perturbations['metagrad'].append(max_meta_grad.item())

    # perform one perturbation
    with torch.no_grad():
        ratings[edge_to_add] = 1

# compute execution time
exec_time = int(time.time() - start_time)
exec_time = time.strftime("%Hh %Mm %Ss", time.gmtime(exec_time))
print('-> Execution time: {}'.format(exec_time))

# store results in CSV files
results = pd.DataFrame(results, columns = ['perturbs', 'iters', 'loss'])
# results.to_csv('results/losses_Delta={}_T={}_LR={}_Factors={}'.format(Delta, T, lr, n_factors))

perturbations = pd.DataFrame(perturbations)
# perturbations.to_csv('results/perturbations_Delta={}_T={}_LR={}_Factors={}'.format(Delta, T, lr, n_factors))

-> Learning rate:  1
-> T:  10
-> Delta:  1
-> Embedding size:  64
-> Device:  cuda:0
-> Execution time: 00h 00m 09s


In [11]:
# check available devices
print('#GPUs: ', torch.cuda.device_count())

#GPUs:  8


#### **Some experiments**

In [36]:
torch.manual_seed(0)
use_gpu = 0
if use_gpu == -1:
    device = 'cpu'
else:
    device = torch.device('cuda:{}'.format(str(use_gpu)) if torch.cuda.is_available() else 'cpu')
print('current device: ', device)

model = CollaborativeFiltering(n_users, n_items, n_factors)
model.to(device)
print('model params in gpu: ', next(model.parameters()).is_cuda)

users = torch.tensor(user_list, device = device)
items = torch.tensor(item_list, device = device)
ratings = torch.tensor(rating_list, device = device)
print('users in gpu: ', users.is_cuda)
print('items in gpu: ', items.is_cuda)
print('ratings in gpu: ', ratings.is_cuda)

p1, p2 = model.parameters()
print('p1: ', p1)
# print(type(p1))
# print('p1 in gpu: ', p1.is_cuda)
# print(p2.is_cuda)

model.train()
y_hat = model(users, items)
# print('y hat: ', y_hat)
# print('y hat in gpu: ', y_hat.is_cuda)

print(ratings)
print(y_hat.dtype)
print(ratings.dtype)

loss = loss_fn(y_hat, ratings)
# print('loss: ', loss)

p1_grad = torch.autograd.grad(loss, p1, create_graph=True)
# print('p1 grad: ', p1_grad)
with torch.no_grad():
    p1_new = p1 - lr * p1_grad[0]
    p1.copy_(p1_new)

print('p1: ', p1)

for layer in model.children():
    layer.reset_parameters()

print('p1: ', p1)




current device:  cuda:0
model params in gpu:  True
users in gpu:  True
items in gpu:  True
ratings in gpu:  True
p1:  Parameter containing:
tensor([[-1.1258, -1.1524, -0.2506,  ..., -1.2341,  1.8197, -0.5515],
        [-0.5692,  0.9200,  1.1108,  ...,  1.1648,  0.9234,  1.3873],
        [-0.8834, -0.4189, -0.8048,  ..., -0.9944, -1.1894, -1.1959],
        ...,
        [-1.4320, -0.8343,  1.2806,  ...,  0.0558, -0.6904, -0.5621],
        [-0.2673,  1.4002,  0.3839,  ..., -1.3639, -0.1925,  0.8499],
        [ 0.7665,  0.2639,  1.3664,  ..., -0.2584, -0.1347, -1.5129]],
       device='cuda:0', requires_grad=True)
tensor([1., 1., 0.,  ..., 1., 1., 0.], device='cuda:0')
torch.float32
torch.float32
p1:  Parameter containing:
tensor([[-1.1257, -1.1521, -0.2505,  ..., -1.2341,  1.8195, -0.5513],
        [-0.5693,  0.9200,  1.1108,  ...,  1.1648,  0.9234,  1.3872],
        [-0.8833, -0.4189, -0.8048,  ..., -0.9943, -1.1893, -1.1959],
        ...,
        [-1.4320, -0.8343,  1.2805,  ...,  0.055

In [65]:
ratings = torch.tensor(rating_list, device = device, requires_grad=True)
y_hat = model(users, items)
loss = loss_fn(y_hat, ratings)
print(ratings)
print(ratings.grad)
meta_grad = torch.autograd.grad(loss, ratings)[0]
print(meta_grad)
print(ratings.grad)
meta_grad = torch.autograd.grad(loss, ratings)[0]
print(meta_grad)
print(ratings.grad)

tensor([1., 1., 0.,  ..., 1., 1., 0.], device='cuda:0', requires_grad=True)
None
tensor([-1.8709e-05,  4.7379e-05,  2.3085e-05,  ...,  9.8168e-05,
         5.6827e-05,  5.9913e-08], device='cuda:0')
None


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [27]:
meta_grad.shape

torch.Size([200000])

In [25]:
print(ratings)

tensor([1., 1., 0.,  ..., 1., 1., 0.], device='cuda:0', requires_grad=True)


In [23]:
meta_grad = torch.autograd.grad(loss, ratings)[0]
print(meta_grad)

RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.

In [28]:
model = CollaborativeFiltering(n_users, n_items, n_factors)
model.to(device)

CollaborativeFiltering(
  (user_emb): Embedding(943, 64)
  (item_emb): Embedding(1682, 64)
)

In [32]:
for layer in model.children():
    layer.reset_parameters()
    # if hasattr(layer, 'reset_parameters'):
    #     layer.reset_parameters()

In [33]:
model = CollaborativeFiltering(n_users, n_items, n_factors)
model.to(device)

CollaborativeFiltering(
  (user_emb): Embedding(943, 64)
  (item_emb): Embedding(1682, 64)
)

In [40]:
model = CollaborativeFiltering(n_users, n_items, n_factors)
model.to(device)
for _ in range(5):
    torch.manual_seed(0)
    for layer in model.children():
        layer.reset_parameters()
    p1, p2 = model.parameters()
    print('p1: ', p1)


p1:  Parameter containing:
tensor([[-0.9247, -0.4253, -2.6438,  ...,  0.1210,  0.4730, -1.0823],
        [-0.0334, -0.9734,  0.9559,  ..., -0.2128, -0.3315, -0.2023],
        [-1.1451, -0.5715, -0.6510,  ...,  0.2144, -0.7369, -0.4516],
        ...,
        [ 0.6540, -0.5414, -1.3338,  ...,  0.8598,  1.8550,  1.4760],
        [ 0.0534,  0.3653,  0.5379,  ...,  1.2643,  1.0971, -0.3906],
        [-0.5607, -1.5466,  0.2973,  ..., -1.5469,  0.2791, -0.4792]],
       device='cuda:0', requires_grad=True)
p1:  Parameter containing:
tensor([[-0.9247, -0.4253, -2.6438,  ...,  0.1210,  0.4730, -1.0823],
        [-0.0334, -0.9734,  0.9559,  ..., -0.2128, -0.3315, -0.2023],
        [-1.1451, -0.5715, -0.6510,  ...,  0.2144, -0.7369, -0.4516],
        ...,
        [ 0.6540, -0.5414, -1.3338,  ...,  0.8598,  1.8550,  1.4760],
        [ 0.0534,  0.3653,  0.5379,  ...,  1.2643,  1.0971, -0.3906],
        [-0.5607, -1.5466,  0.2973,  ..., -1.5469,  0.2791, -0.4792]],
       device='cuda:0', requires_g

#### **Measure execution time for various operations in CPU and GPU**

In [56]:
# choose device
use_gpu = -1
if use_gpu == -1:
    device = 'cpu'
else:
    device = torch.device('cuda:{}'.format(str(use_gpu)) if torch.cuda.is_available() else 'cpu')
print('device: ', device)

# start execution
start_time = time.time()

# code

for _ in range(300000):
    ratings = torch.tensor(rating_list, device = device, requires_grad=True)
    # ratings = torch.tensor(rating_list, device = device, requires_grad=False)
    # ratings = torch.tensor(rating_list, device = device, requires_grad=False)

# compute execution time
exec_time = int(time.time() - start_time)
exec_time = time.strftime("%Hh %Mm %Ss", time.gmtime(exec_time))
print('-> Execution time: {}'.format(exec_time))

device:  cpu
-> Execution time: 00h 00m 07s


In [None]:
# to initialise users, items and ratings 100,000 times,
# cpu takes 8 seconds, and gpu takes 56 seconds

# to initialise users, items and ratings 100,000 times with requires grad as True
# cpu takes 7 seconds, and gpu takes 29 seconds

In [63]:
# choose device
use_gpu = 0
if use_gpu == -1:
    device = 'cpu'
else:
    device = torch.device('cuda:{}'.format(str(use_gpu)) if torch.cuda.is_available() else 'cpu')
print('device: ', device)

# start execution
start_time = time.time()

# code
model = CollaborativeFiltering(n_users, n_items, n_factors)
model.to(device)

for _ in range(10000):
    torch.manual_seed(0)
    # model = CollaborativeFiltering(n_users, n_items, n_factors)
    # model.to(device)
    for layer in model.children():
        layer.reset_parameters()
    

# compute execution time
exec_time = int(time.time() - start_time)
exec_time = time.strftime("%Hh %Mm %Ss", time.gmtime(exec_time))
print('-> Execution time: {}'.format(exec_time))

device:  cuda:0
-> Execution time: 00h 00m 00s
