In [10]:
import torch
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from pathlib import Path

def set_random_seed(seed: int = 50):
    """
    Set the random seed for reproducibility.

    Args:
        seed (int): The seed value to use. Default is 50.
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_random_seed()

In [11]:
def loss_function(ratings: torch.Tensor, user_embeddings: torch.Tensor, item_embeddings: torch.Tensor) -> torch.Tensor:
    """
    Loss function for matrix factorization.

    Args:
        ratings (torch.Tensor): Rating data
        user_embeddings (torch.Tensor): User embeddings 
        item_embeddings (torch.Tensor): Item embeddings 

    Returns:
        torch.Tensor: The loss function value.
    """
    predictions = torch.mm(user_embeddings, item_embeddings.T)

    mask = ratings > 0
    masked_predictions = mask * predictions

    loss = ((ratings - masked_predictions) ** 2).sum()

    return loss

In [12]:
def calculate_gradients(ratings: torch.Tensor, user_embeddings: torch.Tensor, item_embeddings: torch.Tensor, mask: torch.Tensor) -> tuple:
    """
    Calculate gradients directly, without backpropagation.

    Args:
        ratings (torch.Tensor): Rating data.
        user_embeddings (torch.Tensor): User embeddings.
        item_embeddings (torch.Tensor): Item embeddings.
        mask (torch.Tensor): Mask tensor to filter out non-relevant entries.

    Returns:
        tuple: Gradients of the user and item embeddings.
    """
    predictions = user_embeddings.matmul(item_embeddings.T)
    masked_predictions = mask * predictions

    grad_user_embeddings = -2 * (ratings - masked_predictions).matmul(item_embeddings)
    grad_item_embeddings = -2 * (ratings - masked_predictions).T.matmul(user_embeddings)

    return grad_user_embeddings, grad_item_embeddings

In [13]:
def read_data(data_path: Path) -> tuple:
    """
    Load the dataset.

    Args:
        data_path (Path): The path to the data directory.

    Returns:
        tuple:
            real_ratings (ndarray): Real ratings of users (unseen by attackers).
            user_gradients (ndarray): User gradients (unseen by attackers).
            item_gradients (ndarray): Item gradients (visible to attackers).
            user_embeddings (ndarray): User embeddings (unseen by attackers).
            item_embeddings (ndarray): Item embeddings (visible to attackers).
            training_loss (ndarray): Loss during user training (not currently used).
    """
    real_ratings = np.load(data_path.joinpath('R.npy'))
    user_gradients = np.load(data_path.joinpath('X_grad.npy'))
    item_gradients = np.load(data_path.joinpath('y_grad.npy'))
    user_embeddings = np.load(data_path.joinpath('X_embedding.npy'))
    item_embeddings = np.load(data_path.joinpath('y_embedding.npy'))
    training_loss = np.load(data_path.joinpath('loss_g.npy'))
    
    return real_ratings, user_gradients, item_gradients, user_embeddings, item_embeddings, training_loss

In [14]:
def loss_function_dummy_gradient(predicted_gradient: torch.Tensor, true_gradient: torch.Tensor) -> torch.Tensor:
    """
    Compute the Euclidean distance between the dummy gradient and the user-uploaded gradient.

    Args:
        predicted_gradient (torch.Tensor): Dummy gradient.
        true_gradient (torch.Tensor): User-uploaded gradient.

    Returns:
        torch.Tensor: The Euclidean distance between the two gradients.
    """
    difference = predicted_gradient - true_gradient
    distance = torch.sum(difference ** 2)
    return distance

In [None]:
final_loss_list = []
final_dummy_ratings = []
final_ratings = []

for user_id in range(30):
    print(f'Processing user {user_id}')
    data_path = Path(f'./data/{user_id}/')
    ratings, user_gradients, item_gradients, user_embeddings, item_embeddings, training_loss = read_data(data_path)

    user_embedding_1 = user_embeddings[0]
    user_embedding_2 = user_embeddings[1]

    item_embedding_1 = item_embeddings[0]
    item_embedding_2 = item_embeddings[1]
    item_embedding_3 = item_embeddings[2]

    ratings_copy = ratings.copy()

    user_gradient_1 = user_gradients[0]
    user_gradient_2 = user_gradients[1]

    item_gradient_1 = item_gradients[0]
    item_gradient_2 = item_gradients[1]

    user_embedding_1 = torch.tensor(user_embedding_1, dtype=torch.float32)
    user_embedding_2 = torch.tensor(user_embedding_2, dtype=torch.float32)

    item_embedding_1 = torch.tensor(item_embedding_1, dtype=torch.float32)
    item_embedding_2 = torch.tensor(item_embedding_2, dtype=torch.float32)
    item_embedding_3 = torch.tensor(item_embedding_3, dtype=torch.float32)

    ratings_copy = torch.tensor(ratings_copy, dtype=torch.float32)

    user_gradient_1 = torch.tensor(user_gradient_1, dtype=torch.float32)
    user_gradient_2 = torch.tensor(user_gradient_2, dtype=torch.float32)

    item_gradient_1 = torch.tensor(item_gradient_1, dtype=torch.float32)
    item_gradient_2 = torch.tensor(item_gradient_2, dtype=torch.float32)

    torch.random.manual_seed(42)
    dummy_user_embedding = torch.rand_like(user_embedding_1, requires_grad=True)
    dummy_ratings = torch.randint_like(ratings_copy, low=0, high=6, requires_grad=True)

    mask = ratings_copy > 1e-6

    optimizer = torch.optim.LBFGS([dummy_user_embedding, dummy_ratings])
    loss_list = []

    for step in range(1, 500):
        def closure():
            optimizer.zero_grad()
            pred1, pred2 = calculate_gradients(dummy_ratings, dummy_user_embedding, item_embedding_1, mask)

            loss1 = loss_function_dummy_gradient(pred2, item_gradient_1)
            temp_dummy_user_embedding = dummy_user_embedding - 0.0005 * pred1

            pred1, pred2 = calculate_gradients(dummy_ratings, temp_dummy_user_embedding, item_embedding_2, mask)
            loss2 = loss_function_dummy_gradient(pred2, item_gradient_2)
            total_loss = loss1 + loss2
            total_loss.backward()
            loss_list.append(total_loss.item())
            return total_loss

        optimizer.step(closure)

    final_loss_list.append(loss_list)
    final_dummy_ratings.append(dummy_ratings.abs().detach().numpy())
    final_ratings.append(ratings)

Processing user 0
Processing user 1
Processing user 2
Processing user 3
Processing user 4
Processing user 5
Processing user 6
Processing user 7
Processing user 8
Processing user 9
Processing user 10
Processing user 11
Processing user 12
Processing user 13
Processing user 14
Processing user 15
Processing user 16
Processing user 17
Processing user 18
Processing user 19
Processing user 20
Processing user 21
Processing user 22
Processing user 23
Processing user 24
Processing user 25
Processing user 26
Processing user 27
Processing user 28
Processing user 29


In [None]:
# Print the actual ratings for the first user
actual_ratings = final_ratings[0][final_ratings[0] > 0]
print("Actual ratings:", actual_ratings)

Actual ratings for the first user: [5. 4. 3. 5. 4. 1. 5. 3. 2. 5. 5. 5. 5. 5. 4. 5. 4. 1. 4. 2. 1. 3. 5. 4.
 2. 3. 2. 5. 4. 5. 4. 4. 5. 3. 5. 4. 4. 3. 3. 5. 4. 5. 4. 5. 5. 4. 3. 2.
 5. 4. 4. 3. 4. 3. 3. 3. 4. 3. 4. 4. 4. 1. 4. 5. 5. 4. 3. 5. 4. 5. 4. 5.
 3. 5. 2. 4. 5. 3. 4. 3. 5. 2. 2. 1. 2. 4. 5. 5. 5. 1. 5. 5. 3. 3. 5. 1.
 4. 4. 5. 3. 2. 5. 4. 5. 3. 1. 4. 4. 3. 5. 1. 3. 1. 2. 1. 2. 3. 2. 5. 4.
 5. 5. 2. 4. 3. 3. 4. 4. 4. 3. 5. 5. 2. 5. 5. 5. 5. 5. 5. 5. 5. 3. 3. 5.
 4. 5. 4. 4. 4. 4. 3. 3. 5. 4. 4. 4. 5. 4. 3. 3. 5. 4. 5. 3. 4. 5. 5. 4.
 4. 3. 4. 2. 4. 3. 3. 1. 3. 5. 4. 5. 4. 4. 1. 3. 2. 4. 4. 2. 4. 3. 4. 5.
 1. 2. 2. 5. 1. 4. 4. 4. 4. 2. 5. 2. 4. 1. 1. 3. 1. 4. 1. 4. 5. 5. 5. 2.
 3.]


In [24]:
# Print the dummy ratings
actual_dummy_ratings = final_dummy_ratings[0][final_ratings[0] > 0]
print("Dummy ratings:", actual_dummy_ratings)

Dummy ratings: [4.999999   3.9999964  2.9999971  5.0000024  3.9999995  0.99999905
 4.999999   2.9999974  1.9999977  5.0000014  5.         5.
 5.000001   4.9999995  4.000001   5.000002   3.9999983  1.0000018
 3.999999   1.9999986  0.9999999  3.0000007  4.9999986  3.999998
 1.999998   3.         1.999998   4.9999976  3.9999993  4.9999995
 4.0000005  3.9999979  4.9999986  2.9999986  4.9999995  4.0000024
 4.0000005  2.9999983  3.0000005  4.999998   3.9999993  4.9999995
 3.9999971  4.9999976  4.999999   4.         2.9999957  2.
 5.0000005  4.0000014  3.9999986  2.9999986  3.9999998  2.9999983
 2.9999986  2.999997   3.999997   2.9999983  4.000001   3.999999
 3.9999995  0.9999999  3.9999998  4.9999976  5.         3.999998
 2.9999986  5.0000005  4.         4.9999976  4.0000005  4.9999986
 2.9999979  5.0000005  1.9999988  3.9999979  5.000001   2.9999988
 3.9999995  2.9999971  4.9999995  1.9999986  2.0000002  0.9999978
 1.9999987  4.0000014  5.         4.999999   5.         0.9999993
 5.        

In [25]:
# Save the results
np.save('final_dummyR.npy', np.array(final_dummy_ratings))
np.save('final_R.npy', np.array(final_ratings))