In [1]:
from IPython.display import display, Markdown, Latex
import os
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch import nn
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

import torchvision.models as models
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import Omniglot
from PIL import Image

from datetime import datetime

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Will use:", device)

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

Will use: cuda


In [2]:
data_dir = '.'

## Loading datasets

In [3]:
from hypnettorch.data import FashionMNISTData, MNISTData
from hypnettorch.data.dataset import Dataset
from hypnettorch.mnets import LeNet
from hypnettorch.mnets.resnet import ResNet
from hypnettorch.mnets.mlp import MLP
from hypnettorch.hnets import HMLP

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import learn2learn as l2l
import copy

np.random.seed(42)
torch.manual_seed(42)

mnist = MNISTData(data_dir, use_one_hot=True, validation_size=0)
fmnist = FashionMNISTData(data_dir, use_one_hot=True, validation_size=0)

omniglot = l2l.vision.datasets.FullOmniglot(root=data_dir,
                                            transform=transforms.Compose([
                                                transforms.Resize(28, interpolation=Image.LANCZOS),
                                                transforms.ToTensor(),
                                                lambda x: 1.0 - x,
                                            ]),
                                            download=True)
omniglot = l2l.data.MetaDataset(omniglot)


Reading MNIST dataset ...
Elapsed time to read dataset: 0.122000 sec
Files already downloaded and verified
Files already downloaded and verified


## Convert the dataset to numpy for easier manipulation

In [4]:
# Create a DataLoader for batching and shuffling the data
batch_size = len(omniglot)  # Set batch size to the total number of examples to load all data at once
data_loader = DataLoader(omniglot, batch_size=batch_size, shuffle=False)

# Iterate through the DataLoader
for batch in data_loader:
    images, labels = batch
    # Convert PyTorch tensors to NumPy arrays
    dataset = images.numpy()
    dataset_lbl = labels.numpy()    
    sizes = dataset.shape
    
print("Dataset dimension:", dataset.shape)
print("Labels dimension:", dataset_lbl.shape)
print(np.min(dataset_lbl))
print(np.max(dataset_lbl))
    

Dataset dimension: (32460, 1, 28, 28)
Labels dimension: (32460,)
0
1622


## Create 2 different datasets for two disjoint set of labels (deterministic for now)

In [5]:
# Get a batch of training samples from each data handler.
# mnist_inps, mnist_trgts = mnist.next_train_batch(4)
# dataset_inps, dataset_trgts = dataset.next_train_batch(4)
# dataset_full, dataset_full_lbl = dataset.next_train_batch(60000)
print(dataset_lbl)

n_classes = len(np.unique(dataset_lbl))
dataset_full = dataset.reshape((dataset.shape[0], dataset.shape[2]*dataset.shape[3]))
dataset_full_lbl = dataset_lbl

print(dataset_full.shape)
print(dataset_full_lbl.shape)

sep = 100
lbls_0 = [i for i in range(sep)]
lbls_1 = [i for i in range(sep, n_classes)]

mask_0 = np.isin(dataset_full_lbl, np.array(lbls_0))
mask_1 = np.isin(dataset_full_lbl, np.array(lbls_1))
dataset_0, dataset_0_lbl = dataset_full[mask_0], dataset_full_lbl[mask_0]

print("Shape of the dataset_0:",dataset_0.shape)

dataset_1, dataset_1_lbl = dataset_full[mask_1], dataset_full_lbl[mask_1]

print("Shape of the dataset_1:",dataset_1.shape)

print("Some labels in set 1:", dataset_0_lbl[0:10])
print("Some labels in set 2:", dataset_1_lbl[0:10])
assert(np.all(np.isin(dataset_0_lbl, lbls_0)))
assert(np.all(np.isin(dataset_1_lbl, lbls_1)))

# mnist.plot_samples('MNIST Examples', mnist_inps, outputs=mnist_trgts)
# dataset.plot_samples('FashionMNIST Examples with lbl < sep', dataset_0[0:4], outputs=dataset_0_lbl[0:4])
# dataset.plot_samples('FashionMNIST Examples with lbl >= sep', dataset_1[0:4], outputs=dataset_1_lbl[0:4])

torch_dataset = torch.tensor(dataset_full_lbl)
unique_values, counts = torch.unique(torch_dataset, return_counts=True)

print("Minimum and maximum amount of sample per classes in the dataset")
print("Each classes contains at least", torch.min(counts).item(), "samples")
print("Each classes contains at most", torch.max(counts).item(), "samples")

[   0    0    0 ... 1622 1622 1622]
(32460, 784)
(32460,)
Shape of the dataset_0: (2000, 784)
Shape of the dataset_1: (30460, 784)
Some labels in set 1: [0 0 0 0 0 0 0 0 0 0]
Some labels in set 2: [100 100 100 100 100 100 100 100 100 100]
Minimum and maximum amount of sample per classes in the dataset
Each classes contains at least 20 samples
Each classes contains at most 20 samples


### Compute a pgd attack on test set to assert robustness

In [62]:
class KernelCNN(nn.Module):
    def __init__(self, z_length):
        super(KernelCNN, self).__init__()
        self.z_length = z_length
        resnet18 = models.resnet18(pretrained=False)
        resnet18.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(4, 4), stride=(1, 1), padding=(3, 3), bias=False)
        resnet18.avgpool = torch.nn.AdaptiveAvgPool2d(1)
        resnet18.fc = torch.nn.Linear(resnet18.fc.in_features, self.z_length)
        self.resnet = resnet18

    def forward(self, x):
        x = x.view(-1, 1, 28, 28)
        return self.resnet(x)
    
def standardize(t):
    mean = torch.mean(t)
    std_dev = torch.std(t)
    return (t - mean) / std_dev

def compute_kernel(X, y, cnn, K):
    """
    Compute Hypershot kernel for a support set X and label y
    It takes the average of the z's for each label as suggested in the Hypershot paper
    
    Args:
        X (tensor): Support set used to compute the kernel
        y (tensor): corresponding labels
        cnn : CNN used to compute the embeddings
        K: the K of K-shot K-way learning

    Returns:
        type: embeddings, kernel
    """
    # Obtain the indices that would sort y_test
    indices = torch.argsort(y)

    # Use the indices to sort the rows of X_test
    sorted_X = X[indices].to(device)
    sorted_y = y[indices].to(device)
    
    reshaped_X = sorted_X.view(sorted_X.shape[0], 1, 28, 28).to(device)
    nn_X = cnn(reshaped_X)
    
    mean_X = torch.zeros((int(nn_X.shape[0] / K), nn_X.shape[1])).to(device)
    for i in range(K):
        mean_X[i] = torch.mean(nn_X[i*K:(i+1)*K], dim = 0)
    
    assert(nn_X.shape==(sorted_X.shape[0], cnn.z_length))
    
    return standardize(mean_X), standardize(torch.matmul(mean_X, torch.t(mean_X)))

def get_s_and_q_sets(X, y, trgt_lbls, K, q_size):
    """
    Computes a support set for data X for classes in y with K sample per classes
    and corresponding query sets of size q_size.
    
    Args:
        X (tensor): Data used to compute the sets (can contain label you do not want for your sets)
        y (tensor): corresponding labels
        trgt_lbls : the labels that end up in the sets
        K: the K of K-shot K-way learning
        q_size: amount of sample per classes in query set

    Returns:
        type: support set, support set labels, query set, query set labels
    """
    
    s_set = np.zeros((len(trgt_lbls) * K, X.shape[1]))
    s_set_lbl = np.zeros((len(trgt_lbls) * K))
    
    q_set = np.zeros((len(trgt_lbls) * q_size, X.shape[1]))
    q_set_lbl = np.zeros((len(trgt_lbls) * q_size))
    
    for j, l in enumerate(trgt_lbls):
        mask = (y == l)
        masked_data = X[mask]
        masked_lbls = y[mask]
        s_set[j*K:(j+1)*K] = masked_data[0:K]
        s_set_lbl[j*K:(j+1)*K] = masked_lbls[0:K]
        q_set[j*q_size:(j+1)*q_size] = masked_data[K:K+q_size]
        q_set_lbl[j*q_size:(j+1)*q_size] = masked_lbls[K:K+q_size]
    
    s_set = torch.tensor(s_set, requires_grad=True).to(device).float()
    s_set_lbl = torch.tensor(s_set_lbl, requires_grad=True).to(device).float()
    q_set = torch.tensor(q_set, requires_grad=True).to(device).float()
    q_set_lbl = torch.tensor(q_set_lbl, requires_grad=True).to(device).float()
    
    return s_set, s_set_lbl, q_set, q_set_lbl

def get_q_sample_features(X, cnn, kernel, zs):
    """
    Computes the final features used for classification, given a query sample mx
    
    Args:
        X (tensor): query sample 
        cnn: the cnn trained to compute the desired features
        kernel: the kernel corresponding to the corresponding X's support set
        zs: z space of the support set corresponding to the query sample

    Returns:
        type: final flattened features use by the main network
    """
    X = X.view(1, 28, 28)
    zs_q = cnn(X)
    zs_q_m = standardize(torch.matmul(zs, torch.t(zs_q)))
    # This could be modified, the features are just the concatenation of the kernel and the q_sample multiplied
    # by the z_space of the support set
    q_features = zs_q_m.flatten()
    return q_features

def compute_sets_and_features(X, y, trgt_lbls, cnn, K, q_size):
    s_set, s_set_lbl, q_set, q_set_lbl = get_s_and_q_sets(X, y, trgt_lbls, K, q_size)

    # Kernel computation
    z_space, kernel = compute_kernel(s_set, s_set_lbl, cnn, K)

    # Gather features for all samples in query training set
    all_q_features = torch.zeros((q_set.shape[0], K)).to(device)
    all_q_features_lbls = torch.zeros((q_set.shape[0])).to(device)
    for i in range(q_set.shape[0]):
        x = q_set[i].view(1, -1)
        q_sample_features = get_q_sample_features(x, cnn, kernel, z_space)
        all_q_features[i] = q_sample_features
        all_q_features_lbls[i] = q_set_lbl[i]
        
    return s_set, s_set_lbl, q_set, q_set_lbl, z_space, kernel, all_q_features, all_q_features_lbls

def extend_pred_to_nclasses(pred, n_c, lbls):
    out = torch.zeros((pred.shape[0], n_classes)).to(device)
    for i in range(out.shape[0]):
        out[i][c_lbls] = pred[i]
    return out

In [63]:
def project(x_adv, x_orig):
    epsilon = 8/255.0
    x_adv_eps = torch.minimum(torch.maximum(x_adv, x_orig-epsilon), x_orig+epsilon)
    return torch.clamp(x_adv_eps, 0, 1)

def pgd_attack_data(X, y, t_mnet, t_hnet, K, cnn, kernel, zs):
    criterion = nn.CrossEntropyLoss()    
    x_adv = torch.clone(X).detach()
   
    for i in range(20):
        x_adv = x_adv.requires_grad_(True)
        x_features = torch.zeros((x_adv.shape[0], K)).to(device)
        x_features_lbls = torch.zeros((x_adv.shape[0])).to(device)
        for j in range(x_adv.shape[0]):
            mx = x_adv[j].view(-1, X.shape[1])
            x_sample_features = get_q_sample_features(mx, cnn, kernel, zs)
            x_features[j] = x_sample_features
            x_features_lbls[j] = y[j]
            
        # Apply to test set
        W_mnet = t_hnet(cond_id=0)
        logits = t_mnet.forward(x_features, weights=W_mnet)
        loss_adv = criterion(logits, x_features_lbls.long())
        loss_adv.backward(retain_graph=True)
        
        grad = x_adv.grad.detach()

        with torch.no_grad():
            x_adv = x_adv + 0.1 * torch.sign(grad)  # take a gradient update step to minimize the objective
            x_adv = project(x_adv, X)               # ensure we stay in the allowed range
            
    return x_adv

In [73]:
def calc_accuracy_lbls(X_test, y_test, test_classes, hnet, mnet, Ks, cnn, n_c, q_size):
    """
    Computes the prediction accuracy for the sample with label test_classes in X_test.
    Mainly used as utility for the calc_accuracy function below.
    
    Args:
        X_test (tensor): entire test set
        y_test (tensor): corresponding labels
        test_classes: the classes we want to consider for testing accuracies (should contain Ks classes)
        mnet : main net trained by the hypernetwork
        Ks: the K of K-shot K-way
        s_cnn: the cnn trained to compute the desired features

    Returns:
        type: accuracy
    """
    
    with torch.no_grad():
        s_set_test, s_set_lbl_test, q_set_test, q_set_lbl_test = get_s_and_q_sets(X_test, y_test, \
                                                                                 test_classes, Ks, q_size)
        z_space, K = compute_kernel(s_set_test, s_set_lbl_test, cnn, Ks)
        
        # Accuracy
        all_q_features = torch.zeros((q_set_test.shape[0], Ks)).to(device)
        all_q_features_lbls = torch.zeros((q_set_test.shape[0])).to(device)
        for i in range(q_set_test.shape[0]):
            mx = q_set_test[i].view(-1, q_set_test.shape[1])
            q_sample_features = get_q_sample_features(mx, cnn, K, z_space)
            all_q_features[i] = q_sample_features
            all_q_features_lbls[i] = q_set_lbl_test[i]

        W_dataset_l_acc =  hnet(uncond_input=K.view(1, -1))
        dataset_l_P_acc = mnet.forward(all_q_features, weights=W_dataset_l_acc)
        prediction_extended_acc = extend_pred_to_nclasses(dataset_l_P_acc, n_c, test_classes)
        criterion = nn.CrossEntropyLoss()
        loss = criterion(prediction_extended_acc, all_q_features_lbls.long())
        accuracy = (torch.argmax(prediction_extended_acc,dim=1) == all_q_features_lbls.long()).float().mean().item()
        print("Correctly predicted samples had labels:", all_q_features_lbls[torch.argmax(prediction_extended_acc,dim=1) == all_q_features_lbls.long()])
    return accuracy, loss.item()


def calc_accuracy(X_test, y_test, hnet, mnet, Ks, cnn, n_c, q_size):
    """
    Computes the prediction accuracy for the entire X_test test set.
    
    Args:
        X_test (tensor): entire test set
        y_test (tensor): corresponding labels
        mnet : main net trained by the hypernetwork
        Ks: the K of K-shot K-way
        s_cnn: the cnn trained to compute the desired features

    Returns:
        type: average accuracy over all the label batch (of Ks different labels each time)
    """
    if not torch.is_tensor(X_test):
        X_test_t = torch.FloatTensor(X_test).to(device)
    else: 
        X_test_t = torch.clone(X_test)
        
    if not torch.is_tensor(y_test):
        y_test_t = torch.FloatTensor(y_test).to(device)
    else:
        y_test_t = torch.clone(y_test)
        
    diff_classes = torch.unique(y_test_t)
    n_diff_classes = diff_classes.shape[0]
    n_sets = int(n_diff_classes / Ks)
    acc, loss = 0.0, 0.0
    for i in range(n_sets):
        lbls = diff_classes[i*Ks:(i+1)*Ks].tolist()
        d_acc, d_loss = calc_accuracy_lbls(X_test, y_test, lbls, hnet, mnet, Ks, cnn, n_c, q_size)
        acc += d_acc
        loss += d_loss
    acc = acc / n_sets
    loss = loss / n_sets
    return acc, loss

In [74]:
def calc_accuracy_lbls_adv(X_test, y_test, test_classes, mnet, Ks, s_cnn, q_set_test_adv):
    """
    Same as the calc_accuracy_lbls function but replace the query set with an attacked version of itself.
    """
    
    with torch.no_grad():
        s_set_test, s_set_lbl_test, q_set_test, q_set_lbl_test = get_s_and_q_sets(X_test, y_test, \
                                                                                 test_classes, Ks, 5) 
        q_set_test = q_set_test_adv
        z_space, K = compute_kernel(s_set_test, s_set_lbl_test, s_cnn, Ks)
        
        # Accuracy
        all_q_features = torch.zeros((q_set_test.shape[0], Ks)).to(device)
        all_q_features_lbls = torch.zeros((q_set_test.shape[0])).to(device)
        for i in range(q_set_test.shape[0]):
            mx = q_set_test[i].view(-1, q_set_test.shape[1])
            my = torch.argmax(q_set_lbl_test[i])
            q_sample_features = get_q_sample_features(mx, s_cnn, K, z_space)
            all_q_features[i] = q_sample_features
            all_q_features_lbls[i] = my

        W_dataset_l = hnet(cond_id=0)
        dataset_l_P = mnet.forward(all_q_features, weights=W_dataset_l)
        criterion = nn.CrossEntropyLoss()
        loss = criterion(dataset_l_P, all_q_features_lbls.long())
        accuracy = (torch.argmax(dataset_l_P,dim=1) == all_q_features_lbls.long()).float().mean().item()
        print("Correctly predicted labels:", all_q_features_lbls.long()[torch.argmax(dataset_l_P,dim=1) == all_q_features_lbls.long()])
    return accuracy, loss.item()

In [75]:
from sklearn.model_selection import train_test_split

# Configure training.
nepochs=250
# epoch after which adversarial training starts
do_adv_train = 10000
# K-shot k-way
Ks = 5
# Length of the embeddings produced by the CNN
z_len = 10

load_weights = 0
continue_training = 0

# Array storing statistics (not used for now)
accuracies_dataset_0 = []
accuracies_dataset_0_adv = []
accuracies_dataset_1 = []
accuracies_dataset_1_adv = []

# Loop in case we want to do statistics (not sued for now)
for o in range(1):
    print("Iteration", o+1)
    
    if continue_training == 0:
        # Models definition
        kcnn = KernelCNN(z_len).to(device)
        mnet = MLP(n_in=Ks, n_out=Ks, hidden_layers=[5, 5]).to(device)
        hnet = HMLP(mnet.param_shapes, uncond_in_size=Ks*Ks, cond_in_size=0,
                    layers=[100, 100], num_cond_embs=0).to(device)
        params = hnet.conditional_params.copy()
        hnet.apply_hyperfan_init(mnet=mnet)
        criterion = nn.CrossEntropyLoss()

        # If we want to load weights from anywhere
        if load_weights == 1:
            file_path = 'models/hnet_20231229022719_49.pth'
            hnet.load_state_dict(torch.load(file_path))
            file_path = 'models/kcnn_20231229022719_49.pth'
            kcnn.load_state_dict(torch.load(file_path))

        # The amount of sets of Ks labels we can do during training
        n_sets = int(len(lbls_0) / Ks)

        # Compute training and validation sets for each of the n_sets labels sets
        train_test_sets = []
        all_test_sets = np.empty((0, dataset_0.shape[1]))
        all_test_sets_lbl = np.empty((0))
        for l_set_id in range(n_sets):
            c_lbls = lbls_0[l_set_id*Ks:(l_set_id+1)*Ks]
            if (l_set_id+1) % 100 == 0:
                print("Generated train-test split for", l_set_id+1,"/",n_sets)
            mask_b = np.isin(dataset_0_lbl, np.array(c_lbls))
            dataset_0_b, dataset_0_lbl_b = dataset_0[mask_b], dataset_0_lbl[mask_b]
            dataset_0_train, dataset_0_test, dataset_0_lbl_train, dataset_0_lbl_test = \
                            train_test_split(dataset_0_b, dataset_0_lbl_b, random_state=42, test_size=0.5, stratify=dataset_0_lbl_b)
            all_test_sets = np.concatenate((all_test_sets, dataset_0_test), axis=0)
            all_test_sets_lbl = np.concatenate((all_test_sets_lbl, dataset_0_lbl_test), axis=0)
            train_test_sets.append((dataset_0_train, dataset_0_test, dataset_0_lbl_train, dataset_0_lbl_test, c_lbls))
    
    # Optimizer and scheduler initialization
    optimizer = optim.Adam(hnet.parameters(), lr=0.00001)
    optimizer_s = optim.Adam(kcnn.parameters(), lr=0.00001)
    scheduler = CosineAnnealingLR(optimizer, T_max=nepochs, eta_min=0.00001)
    scheduler_s = CosineAnnealingLR(optimizer_s, T_max=nepochs, eta_min=0.00001)
        
    # Main training loop
    for epoch in range(nepochs): # For each epoch.
        print("----------------------- Epoch", epoch, " -----------------------")
        # Stores the loss over all labels sets
        global_loss = 0.0
        # We loop over all our sets at each epoch
        for l_set_id in range(n_sets):
            (dataset_l_train, dataset_l_test, dataset_l_lbl_train, dataset_l_lbl_test, c_lbls) = train_test_sets[l_set_id]
            
            s_set_train, s_set_lbl_train, q_set_train, q_set_lbl_train, z_space, K, all_q_features, all_q_features_lbls = \
            compute_sets_and_features(dataset_l_train, dataset_l_lbl_train, c_lbls, kcnn, Ks, 1)
            
            # Formward pass
            W_dataset_l = hnet(uncond_input=K.view(1, -1))
            dataset_l_P = mnet.forward(all_q_features, weights=W_dataset_l)
            prediction_extended = extend_pred_to_nclasses(dataset_l_P, n_classes, c_lbls)
            loss_dataset_l = criterion(prediction_extended, all_q_features_lbls.long())

            # Adversarial training
            if epoch == do_adv_train and l_set_id == 0:
                print("Adversarial training starts.")
            if epoch >= do_adv_train:
                mx_adv = pgd_attack_data(q_set_train, q_set_lbl_train, mnet, hnet, Ks, kcnn, K, z_space)
                
                all_q_features_adv = torch.zeros((q_set_train.shape[0], Ks)).to(device)
                for i in range(mx_adv.shape[0]):
                    mxx = mx_adv[i].view(-1, q_set_train.shape[1])
                    q_sample_features_adv = get_q_sample_features(mxx, kcnn, K, z_space)
                    all_q_features_adv[i] = q_sample_features_adv
                dataset_l_P_adv = mnet.forward(all_q_features_adv, weights=W_dataset_l)
                prediction_extended_adv = extend_pred_to_nclasses(dataset_l_P_adv, n_classes, c_lbls)
                loss_dataset_l_adv = criterion(prediction_extended_adv, all_q_features_lbls.long())
                loss_dataset_l += loss_dataset_l_adv
            
            global_loss += loss_dataset_l.item()
            if l_set_id % 10 == 0:
                print("Local train acc and loss at the end of set:", l_set_id, "-->", calc_accuracy(dataset_l_train, dataset_l_lbl_train, hnet, mnet,\
                                                                                       Ks, kcnn, n_classes, 5))
                print("Local test acc and loss at the end of set:", l_set_id, "-->", calc_accuracy(dataset_l_test, dataset_l_lbl_test, hnet, mnet,\
                                                                                       Ks, kcnn, n_classes, 5))
                if do_adv_train < nepochs:
                    s_set_test, s_set_lbl_test, q_set_test, q_set_lbl_test, z_space_tes, K_test, all_q_features_test, all_q_features_lbls_test = \
                    compute_sets_and_features(dataset_l_test, dataset_l_lbl_test, c_lbls, kcnn, Ks, 5)
                    mx_adv_test = pgd_attack_data(q_set_test, q_set_lbl_test, mnet, hnet, Ks, kcnn, K, z_space)
                    print("Local adv test acc and loss at the end of set:", l_set_id, "-->", calc_accuracy_lbls_adv(dataset_l_test, dataset_l_lbl_test, c_lbls, hnet, mnet,\
                                                                                           Ks, kcnn, mx_adv_test))
            loss_dataset_l.backward()
            optimizer.step()
            optimizer_s.step()
            optimizer.zero_grad()
            scheduler.step()
            scheduler_s.step()
                
  
        print("Global loss at the end of epoch:", epoch, ":", global_loss)
        if (epoch+1) % 10 == 0:
            current_time = datetime.now().strftime("%Y%m%d%H%M%S")
            # Create a file name with the current time
            hnet_file = f'models/hnet_{current_time}_{epoch}.pth'
            torch.save(hnet.state_dict(), hnet_file)
            kcnn_file = f'models/kcnn_{current_time}_{epoch}.pth'
            torch.save(kcnn.state_dict(), kcnn_file)
            print("--> Global test accuracy after epoch:", epoch, "-->", calc_accuracy(all_test_sets, all_test_sets_lbl,\
                                                                                       hnet, mnet, Ks, kcnn, n_classes, 5))
        print()

    print("END OF ITERATION:",o+1)

Iteration 1
Creating an MLP with 90 weights.
Created MLP Hypernet.
Hypernetwork with 21790 weights and 90 outputs (compression ratio: 242.11).
The network consists of 21790 unconditional weights (21790 internally maintained) and 0 conditional weights (0 internally maintained).
----------------------- Epoch 0  -----------------------




Correctly predicted samples had labels: tensor([3., 3., 3., 3., 3.], device='cuda:0')
Local train acc and loss at the end of set: 0 --> (0.19999998807907104, 7.757396697998047)
Correctly predicted samples had labels: tensor([1., 1., 1., 1., 1.], device='cuda:0')
Local test acc and loss at the end of set: 0 --> (0.19999998807907104, 7.095114707946777)
Correctly predicted samples had labels: tensor([53., 53., 53., 53., 53.], device='cuda:0')
Local train acc and loss at the end of set: 10 --> (0.19999998807907104, 8.098657608032227)
Correctly predicted samples had labels: tensor([53., 53., 53., 53., 53.], device='cuda:0')
Local test acc and loss at the end of set: 10 --> (0.19999998807907104, 8.064777374267578)
Global loss at the end of epoch: 0 : 150.23689079284668

----------------------- Epoch 1  -----------------------
Correctly predicted samples had labels: tensor([2., 2., 2., 2., 2.], device='cuda:0')
Local train acc and loss at the end of set: 0 --> (0.19999998807907104, 6.91438484

KeyboardInterrupt: 

In [None]:
current_time = datetime.now().strftime("%Y%m%d%H%M%S")

# Create a file name with the current time
hnet_file = f'models/hnet_{current_time}.pth'
torch.save(hnet.state_dict(), hnet_file)
kcnn_file = f'models/kcnn_{current_time}.pth'
torch.save(kcnn.state_dict(), kcnn_file)

In [None]:
# x_adv_dataset_1 = pgd_attack_data(dataset_1, dataset_1_lbl, mnet, hnet, z_space_1, K_1, 1)
# x_adv_dataset_1_np = x_adv_dataset_1.detach().cpu().numpy()
# x_adv_dataset_0_test = pgd_attack_data(dataset_0_test, dataset_0_lbl_test, mnet, hnet, z_space, K, 0)
# x_adv_dataset_0_test_np = x_adv_dataset_0_test.detach().cpu().numpy()

print(calc_accuracy(dataset_0_test, dataset_0_lbl_test, mnet, W_dataset_0, Ks, s_cnn))
print(calc_accuracy(dataset_1, dataset_1_lbl, mnet, W_dataset_0, Ks, s_cnn))
# accuracies_dataset_0_adv.append((calc_accuracy(x_adv_dataset_0_test_np, dataset_0_lbl_test, mnet, W_dataset_0)).detach().cpu())
# accuracies_dataset_1_adv.append((calc_accuracy(x_adv_dataset_1_np, dataset_1_lbl, mnet, W_dataset_1)).detach().cpu())

In [None]:
print("Mean:")
print("dataset 0 accuracy:", np.mean(np.array(accuracies_dataset_0)))
print("dataset 1 accuracy:", np.mean(np.array(accuracies_dataset_1)))
print("dataset 0 adv accuracy:", np.mean(np.array(accuracies_dataset_0_adv)))
print("dataset 1 adv accuracy:", np.mean(np.array(accuracies_dataset_1_adv)))
print()
print("Standard deviation:")
print("dataset 0 accuracy:", np.std(np.array(accuracies_dataset_0)))
print("dataset 1 accuracy:", np.std(np.array(accuracies_dataset_1)))
print("dataset 0 adv accuracy:", np.std(np.array(accuracies_dataset_0_adv)))
print("dataset 1 adv accuracy:", np.std(np.array(accuracies_dataset_1_adv)))