## LeNet-5 (MNIST)

In [51]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np

# Define the LeNet-5 architecture
# class LeNet5(nn.Module):
#     def __init__(self, in_channels=1, num_classes=10):
#         super(LeNet5, self).__init__()
#         self.num_classes = num_classes
#         self.conv1 = nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=2)  # Convolutional layer with 6 feature maps of size 5x5
#         self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 6 feature maps of size 2x2
#         self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)  # Convolutional layer with 16 feature maps of size 5x5
#         self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 16 feature maps of size 2x2
        
#         # These layers depend on the input size
#         self.fc1 = nn.Linear(16 * 5 * 5, 120)  # Fully connected layer, output size 120
#         self.fc2 = nn.Linear(120, 84)  # Fully connected layer, output size 84
#         self.fc3 = nn.Linear(84, num_classes)  # Fully connected layer, output size num_classes

#     def forward(self, x):
#         x = F.relu(self.conv1(x))  # Apply ReLU after conv1
#         x = self.pool1(x)  # Apply subsampling pool1
#         x = F.relu(self.conv2(x))  # Apply ReLU after conv2
#         x = self.pool2(x)  # Apply subsampling pool2
#         x = x.view(x.size(0), -1)  # Flatten for fully connected layers
#         x = F.relu(self.fc1(x))  # Apply ReLU after fc1
#         x = F.relu(self.fc2(x))  # Apply ReLU after fc2
#         x = self.fc3(x)  # Output layer
#         return x

# Define a function to train the model
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        # if batch_idx % 100 == 0:
        #     print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
        #           f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

# Define a function to test the model
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    latent_all = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output, latent = model(data, latent=True)
            latent_all.append(latent.cpu().numpy())
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
          f'({100. * correct / len(test_loader.dataset):.0f}%)\n')
    
    latent_all = np.concatenate(latent_all, axis=0)
    latent_mean = np.mean(latent_all, axis=0)
    print(f"Latent Mean: {np.mean(latent_mean)}, Latent Std: {np.std(latent_mean)}, Latent Max: {np.max(latent_mean)}, Latent Min: {np.min(latent_mean)}")
    
    
    
# define device
def check_gpu(manual_seed=True, print_info=True):
    if manual_seed:
        torch.manual_seed(0)
    if torch.cuda.is_available():
        if print_info:
            print("CUDA is available")
        device = 'cuda'
        torch.cuda.manual_seed_all(0) 
    elif torch.backends.mps.is_available():
        if print_info:
            print("MPS is available")
        device = torch.device("mps")
        torch.mps.manual_seed(0)
    else:
        if print_info:
            print("CUDA is not available")
        device = 'cpu'
    return device

# Main function to run the training and testing
def main():
    # Training settings
    batch_size = 128
    test_batch_size = 1000
    epochs = 100
    lr = 0.01
    momentum = 0.9
    no_cuda = False
    seed = 1

    device = check_gpu(manual_seed=True, print_info=True)

    torch.manual_seed(seed)

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)
    # reduce the dimension of training data
    train_dataset.data = train_dataset.data[:5000]
    test_dataset = datasets.MNIST('../data', train=False, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

    model = LeNet5(in_channels=1, num_classes=10).to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

if __name__ == '__main__':
    main()

MPS is available

Test set: Average loss: 2.2608, Accuracy: 3158/10000 (32%)

Latent Mean: 0.16155964136123657, Latent Std: 0.17962320148944855, Latent Max: 0.8367756605148315, Latent Min: 4.525206350081135e-06

Test set: Average loss: 1.0852, Accuracy: 6539/10000 (65%)

Latent Mean: 1.9909775257110596, Latent Std: 2.1955525875091553, Latent Max: 10.501509666442871, Latent Min: 8.462696996502928e-07

Test set: Average loss: 0.6658, Accuracy: 7929/10000 (79%)

Latent Mean: 2.3062903881073, Latent Std: 2.6182315349578857, Latent Max: 11.051698684692383, Latent Min: 1.6220154748225468e-06

Test set: Average loss: 0.4149, Accuracy: 8673/10000 (87%)

Latent Mean: 2.1366264820098877, Latent Std: 2.330186605453491, Latent Max: 10.01394271850586, Latent Min: 2.7257292458671145e-06

Test set: Average loss: 0.3518, Accuracy: 8895/10000 (89%)

Latent Mean: 2.2515532970428467, Latent Std: 2.424161672592163, Latent Max: 10.935249328613281, Latent Min: 4.064067525177961e-06

Test set: Average loss: 

In [37]:
from sklearn.decomposition import PCA

 
# Training settings
batch_size = 64
test_batch_size = 1000
epochs = 1
lr = 0.01
momentum = 0.9
no_cuda = False
seed = 1

device = check_gpu(manual_seed=True, print_info=True)

torch.manual_seed(seed)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('../data', train=False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

# Define the LeNet-5 architecture
# LeNet-5 model
from math import prod
class LeNet5(nn.Module):
    def __init__(self, in_channels=1, num_classes=10, input_size=(28, 28)):
        super(LeNet5, self).__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=2)  # Convolutional layer with 6 feature maps of size 5x5
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 6 feature maps of size 2x2
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)  # Convolutional layer with 16 feature maps of size 5x5
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 16 feature maps of size 2x2
        
        # Dinamically calculate the size of the features after convolutional layers
        dummy_input = torch.zeros(1, in_channels, *input_size)
        dummy_output = self.pool2(self.conv2(self.pool1(self.conv1(dummy_input))))
        self.feature_size = prod(dummy_output.size()[1:])

        self.fc1 = nn.Linear(self.feature_size, 120)  # Fully connected layer, output size 120
        self.fc2 = nn.Linear(120, 84)  # Fully connected layer, output size 84
        self.fc3 = nn.Linear(84, num_classes)  # Fully connected layer, output size num_classes

    def forward(self, x, latent=False):
        x = F.relu(self.conv1(x))  # Apply ReLU after conv1
        x = self.pool1(x)  # Apply subsampling pool1
        x = F.relu(self.conv2(x))  # Apply ReLU after conv2
        x = self.pool2(x)  # Apply subsampling pool2
        x_l = x.view(x.size(0), -1)  # Flatten for fully connected layers
        x = F.relu(self.fc1(x_l))  # Apply ReLU after fc1
        x = F.relu(self.fc2(x))  # Apply ReLU after fc2
        x = self.fc3(x)  # Output layer
        if latent:
            return x, x_l
        else:
            return x

# LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self, in_channels=1, num_classes=10, input_size=(28, 28)):
        super(LeNet5, self).__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=2)  # Convolutional layer with 6 feature maps of size 5x5
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 6 feature maps of size 2x2
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)  # Convolutional layer with 16 feature maps of size 5x5
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 16 feature maps of size 2x2
        
        # Dinamically calculate the size of the features after convolutional layers
        dummy_input = torch.zeros(1, in_channels, *input_size)
        dummy_output = self.pool2(self.conv2(self.pool1(self.conv1(dummy_input))))
        self.feature_size = prod(dummy_output.size()[1:])

        self.fc1 = nn.Linear(self.feature_size, 120)  # Fully connected layer, output size 120
        self.fc2 = nn.Linear(120, 84)  # Fully connected layer, output size 84
        self.fc3 = nn.Linear(84, num_classes)  # Fully connected layer, output size num_classes

    def forward(self, x, latent=False):
        x = F.relu(self.conv1(x))  # Apply ReLU after conv1
        x = self.pool1(x)  # Apply subsampling pool1
        x = F.relu(self.conv2(x))  # Apply ReLU after conv2
        x = self.pool2(x)  # Apply subsampling pool2
        x_l = x.view(x.size(0), -1)  # Flatten for fully connected layers
        x = F.relu(self.fc1(x_l))  # Apply ReLU after fc1
        x = F.relu(self.fc2(x))  # Apply ReLU after fc2
        x = self.fc3(x)  # Output layer
        if latent:
            return x, x_l
        else:
            return x
    

# Resnet-9 layer
def residual_block(in_channels, out_channels, pool=False):
    layers = [
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    ]
    if pool:
        layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)


# ResNet-9 model
class ResNet9(nn.Module):
    def __init__(self, in_channels, num_classes, input_size=(28, 28)):
        super().__init__()
        self.num_classes = num_classes
        self.prep = residual_block(in_channels, 64)
        self.layer1_head = residual_block(64, 128, pool=True)
        self.layer1_residual = nn.Sequential(residual_block(128, 128), residual_block(128, 128))
        self.layer2 = residual_block(128, 256, pool=True)
        self.layer3_head = residual_block(256, 512, pool=True)
        self.layer3_residual = nn.Sequential(residual_block(512, 512), residual_block(512, 512))
        # self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # Changed to adaptive average pooling:         self.MaxPool2d = nn.Sequential(nn.MaxPool2d(4))
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Calculate the size of the features after the convolutional layers
        dummy_input = torch.zeros(1, in_channels, *input_size)
        dummy_output = self.pool(self.layer3_head(self.layer2(self.layer1_head(self.prep(dummy_input)))))
        self.feature_size = dummy_output.size(1) * dummy_output.size(2) * dummy_output.size(3)

        # Output layer
        self.linear = nn.Linear(self.feature_size, num_classes)

    def forward(self, x, latent=False):
        x = self.prep(x)
        x = self.layer1_head(x)
        x = self.layer1_residual(x) + x
        x = self.layer2(x)
        x = self.layer3_head(x)
        x = self.layer3_residual(x) + x
        x = self.pool(x)  # Changed to adaptive average pooling
        x_l = x.view(x.size(0), -1)
        x = self.linear(x_l)
        if latent:
            return x, x_l
        else:
            return x
    

model = ResNet9(in_channels=1, num_classes=10).to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
loss = nn.CrossEntropyLoss()

for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


MPS is available


In [68]:

# ModelEvaluator class
class ModelEvaluator:
    def __init__(self, test_loader, device):
        """
        Initializes the ModelEvaluator with the model, device, and number of classes.
        
        Args:
            test_loader: DataLoader with test data
            device: Device to run the evaluation on
        """
        
        self.test_loader = test_loader
        self.device = device
        self.criterion = torch.nn.CrossEntropyLoss(reduction='none')
        self.criterion_trad = torch.nn.CrossEntropyLoss()
        
        # Fit the PCA model 
        

    def evaluate(self, model, latent=False):
        """
        Evaluates the model on the provided test data and returns various metrics.

        Args:
            model: Model to evaluate
            latent: Whether to return the latent representation of the test data
        """
        
        # client-enhanced evaluation function
        # def evaluate_model_per_class(model, device, test_loader, latent=False):
        # Set model to evaluation mode
        model.eval()
        num_classes = model.num_classes

        # Initialize storage for metrics
        precision_per_class = [0] * num_classes
        recall_per_class = [0] * num_classes
        f1_per_class = [0] * num_classes
        accuracy_per_class = [0] * num_classes
        loss_per_class = [0] * num_classes
        class_counts = [0] * num_classes

        y_true_all = []
        y_pred_all = []
        loss_all = []
        latent_all = []
        loss_trad = 0
        total_samples = 0

        # Accumulate predictions and targets over batches
        with torch.no_grad():
            for data, target in self.test_loader:
                data, target = data.to(self.device), target.to(self.device)
                
                # Get model predictions
                if latent: 
                    output, latent_space = model(data, latent=True)
                    latent_all.extend(latent_space.cpu().numpy())
                else: 
                    output = model(data) 
                    
                y_pred_batch = output.argmax(dim=1, keepdim=False)  # Predicted class labels
                
                # Store the true and predicted labels for the batch
                y_true_all.extend(target.cpu().numpy())
                y_pred_all.extend(y_pred_batch.cpu().numpy())
                
                # Compute per-sample loss for the batch
                batch_loss = self.criterion(output, target).cpu().numpy()
                loss_all.extend(batch_loss)
                
                # Compute traditional loss for the batch
                loss_trad += self.criterion_trad(output, target).item()
                
                # Accumulate the total number of samples
                total_samples += len(target)

        # Convert collected predictions and true labels into tensors for processing
        y_true_all = torch.tensor(y_true_all)
        y_pred_all = torch.tensor(y_pred_all)
        loss_all = torch.tensor(loss_all)
        
        # Average traditional loss over the total number of samples
        loss_trad /= total_samples
        
        # Calculate traditional accuracy on the entire test set
        accuracy_trad = accuracy_score(y_true_all, y_pred_all)
        
        # Average latent
        if latent:
            latent_all = torch.tensor(latent_all)
            latent_all = latent_all.view(latent_all.size(0), -1)
            latent_all = latent_all.mean(dim=0).numpy()

        # Iterate through each class (for MNIST, classes are 0 to 9 by default)
        for class_idx in range(num_classes):
            # Get all predictions and ground truths for the current class
            class_mask = (y_true_all == class_idx)  # Mask for this class
            
            y_true_class = (y_true_all == class_idx).numpy().astype(int)  # Binary labels for the current class
            y_pred_class = (y_pred_all == class_idx).numpy().astype(int)  # Binary predictions for the current class
            
            # Only calculate if there are samples for this class
            if class_mask.sum() > 0:
                # Compute precision, recall, and F1-score for this class
                precision = precision_score(y_true_class, y_pred_class, zero_division=0)
                recall = recall_score(y_true_class, y_pred_class, zero_division=0)
                f1 = f1_score(y_true_class, y_pred_class, zero_division=0)
                accuracy = accuracy_score(y_true_class, y_pred_class)

                # Compute the loss for this class (average the loss of samples in this class)
                class_loss = loss_all[class_mask].mean().item()

                # Update class counts and metrics
                precision_per_class[class_idx] = precision
                recall_per_class[class_idx] = recall
                f1_per_class[class_idx] = f1
                accuracy_per_class[class_idx] = accuracy
                loss_per_class[class_idx] = class_loss
                class_counts[class_idx] = class_mask.sum().item()

        return loss_trad, accuracy_trad, precision_per_class, recall_per_class, f1_per_class, accuracy_per_class, loss_per_class, latent_all


In [69]:
evaluator = ModelEvaluator(test_loader, device)
loss_trad, accuracy_trad, precision_per_class, recall_per_class, f1_per_class, accuracy_per_class, loss_per_class, latent_all = evaluator.evaluate(model, latent=True)
print(f"Traditional Loss: {loss_trad}, Traditional Accuracy: {accuracy_trad}")
print(f"Precision: {precision_per_class}")
print(f"Recall: {recall_per_class}")
print(f"F1: {f1_per_class}")
print(f"Accuracy: {accuracy_per_class}")
print(f"Loss: {loss_per_class}")
print(f"Latent Mean: {np.mean(latent_all)}, Latent Std: {np.std(latent_all)}, Latent Max: {np.max(latent_all)}, Latent Min: {np.min(latent_all)}")

Traditional Loss: 4.95510027743876e-05, Traditional Accuracy: 0.9848
Precision: [0.9918533604887984, 0.9964726631393298, 0.9940711462450593, 0.9776264591439688, 0.9957850368809273, 0.9746136865342163, 0.9968051118210862, 0.9551820728291317, 0.997907949790795, 0.9696969696969697]
Recall: [0.9938775510204082, 0.9955947136563876, 0.9748062015503876, 0.995049504950495, 0.9623217922606925, 0.9899103139013453, 0.9770354906054279, 0.995136186770428, 0.9794661190965093, 0.9831516352824579]
F1: [0.9928644240570846, 0.996033494931688, 0.9843444227005871, 0.9862610402355251, 0.9787674779906784, 0.982202447163515, 0.9868212967843965, 0.9747498808956646, 0.9886010362694301, 0.9763779527559056]
Accuracy: [0.9986, 0.9991, 0.9968, 0.9972, 0.9959, 0.9968, 0.9975, 0.9947, 0.9978, 0.9952]
Loss: [0.02462760917842388, 0.01422794908285141, 0.07511214166879654, 0.009373032487928867, 0.1041930541396141, 0.04359889402985573, 0.08723290264606476, 0.013723541982471943, 0.08038651198148727, 0.050607096403837204]


In [74]:
import json
json.dumps(precision_per_class)

'[0.9918533604887984, 0.9964726631393298, 0.9940711462450593, 0.9776264591439688, 0.9957850368809273, 0.9746136865342163, 0.9968051118210862, 0.9551820728291317, 0.997907949790795, 0.9696969696969697]'

In [59]:
    # standard scaler
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    scaler = StandardScaler()
    
    # Set model to evaluation mode
    model.eval()
    num_classes = model.num_classes
    latent = True

    # Define the cross-entropy loss function
    criterion = torch.nn.CrossEntropyLoss(reduction='none')
    criterion_trad = torch.nn.CrossEntropyLoss()

    # Initialize storage for metrics
    precision_per_class = [0] * num_classes
    recall_per_class = [0] * num_classes
    f1_per_class = [0] * num_classes
    accuracy_per_class = [0] * num_classes
    loss_per_class = [0] * num_classes
    class_counts = [0] * num_classes

    y_true_all = []
    y_pred_all = []
    loss_all = []
    latent_all = []
    loss_trad = 0
    total_samples = 0

    # Accumulate predictions and targets over batches
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            
            # Get model predictions
            if latent: 
                output, latent_space = model(data, latent=True)
                latent_all.extend(latent_space.cpu().numpy())
            else: 
                output = model(data) 
                
            y_pred_batch = output.argmax(dim=1, keepdim=False)  # Predicted class labels
            
            # Store the true and predicted labels for the batch
            y_true_all.extend(target.cpu().numpy())
            y_pred_all.extend(y_pred_batch.cpu().numpy())
            
            # Compute per-sample loss for the batch
            batch_loss = criterion(output, target).cpu().numpy()
            loss_all.extend(batch_loss)
            
            # Compute traditional loss for the batch
            loss_trad += criterion_trad(output, target).item()
            
            # Accumulate the total number of samples
            total_samples += len(target)

    # Convert collected predictions and true labels into tensors for processing
    y_true_all = torch.tensor(y_true_all)
    y_pred_all = torch.tensor(y_pred_all)
    loss_all = torch.tensor(loss_all)
    
    # Average traditional loss over the total number of samples
    loss_trad /= total_samples
    
    # Calculate traditional accuracy on the entire test set
    accuracy_trad = accuracy_score(y_true_all, y_pred_all)
    
    # normalize latent space
    latent_all = np.array(latent_all)
    latent_all = latent_all.mean(axis=0)
    scaler.fit(latent_all.reshape(-1, 1))
    latent_all = scaler.transform(latent_all.reshape(-1, 1))
    
    # PCA
    rand_points = torch.normal(mean=0, std=1, size=(100, latent_all.shape[1]))
    pca_model = PCA(n_components=30)
    
    # # Average latent
    # if latent:
    #     latent_all = torch.tensor(latent_all)
    #     latent_all = latent_all.view(latent_all.size(0), -1)
    #     latent_all = latent_all.mean(dim=0).numpy()

In [None]:
        rand_points = torch.normal(mean=0, std=0.1, size=(100, errors.shape[1]))


In [58]:
latent_all.shape

(512,)

In [60]:
import numpy as np
# latent_all = np.array(latent_all)
# average latent space along the samples
# latent_all_mean = latent_all.mean(axis=0)
max(latent_all), min(latent_all), np.std(latent_all), np.mean(latent_all)

(array([5.9790297], dtype=float32),
 array([-1.2622539], dtype=float32),
 0.99999994,
 -1.8626451e-09)

In [23]:
import torch
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

def evaluate_model_per_class(model, device, test_loader, num_classes=10):
    # Set model to evaluation mode
    model.eval()

    # Define the cross-entropy loss function
    criterion = torch.nn.CrossEntropyLoss(reduction='none')

    # Initialize storage for metrics
    precision_per_class = [0] * num_classes
    recall_per_class = [0] * num_classes
    f1_per_class = [0] * num_classes
    accuracy_per_class = [0] * num_classes
    loss_per_class = [0] * num_classes
    class_counts = [0] * num_classes

    y_true_all = []
    y_pred_all = []
    loss_all = []

    # Accumulate predictions and targets over batches
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            
            # Get model predictions
            output = model(data)
            y_pred_batch = output.argmax(dim=1, keepdim=False)  # Predicted class labels
            
            # Store the true and predicted labels for the batch
            y_true_all.extend(target.cpu().numpy())
            y_pred_all.extend(y_pred_batch.cpu().numpy())
            
            # Compute per-sample loss for the batch
            batch_loss = criterion(output, target).cpu().numpy()
            loss_all.extend(batch_loss)

    # Convert collected predictions and true labels into tensors for processing
    y_true_all = torch.tensor(y_true_all)
    y_pred_all = torch.tensor(y_pred_all)
    loss_all = torch.tensor(loss_all)

    # Iterate through each class (for MNIST, classes are 0 to 9 by default)
    for class_idx in range(num_classes):
        # Get all predictions and ground truths for the current class
        class_mask = (y_true_all == class_idx)  # Mask for this class
        
        y_true_class = (y_true_all == class_idx).numpy().astype(int)  # Binary labels for the current class
        y_pred_class = (y_pred_all == class_idx).numpy().astype(int)  # Binary predictions for the current class
        
        # Only calculate if there are samples for this class
        if class_mask.sum() > 0:
            # Compute precision, recall, and F1-score for this class
            precision = precision_score(y_true_class, y_pred_class, zero_division=0)
            recall = recall_score(y_true_class, y_pred_class, zero_division=0)
            f1 = f1_score(y_true_class, y_pred_class, zero_division=0)
            accuracy = accuracy_score(y_true_class, y_pred_class)

            # Compute the loss for this class (average the loss of samples in this class)
            class_loss = loss_all[class_mask].mean().item()

            # Update class counts and metrics
            precision_per_class[class_idx] = precision
            recall_per_class[class_idx] = recall
            f1_per_class[class_idx] = f1
            accuracy_per_class[class_idx] = accuracy
            loss_per_class[class_idx] = class_loss
            class_counts[class_idx] = class_mask.sum().item()

    return precision_per_class, recall_per_class, f1_per_class, accuracy_per_class, loss_per_class

precision_per_class, recall_per_class, f1_per_class, accuracy_per_class, loss_per_class, loss_all = evaluate_model_per_class(model, device, test_loader)
print("Class-wise evaluation:")
for class_idx in range(10):
    print(f"Class {class_idx}: Precision = {precision_per_class[class_idx]:.4f}, Recall = {recall_per_class[class_idx]:.4f}, F1 = {f1_per_class[class_idx]:.4f}, Accuracy = {accuracy_per_class[class_idx]:.4f}, Loss = {loss_per_class[class_idx]:.4f}")

Class-wise evaluation:
Class 0: Precision = 0.9797, Recall = 0.9837, F1 = 0.9817, Accuracy = 0.9964, Loss = 0.0577
Class 1: Precision = 0.9868, Recall = 0.9877, F1 = 0.9872, Accuracy = 0.9971, Loss = 0.0401
Class 2: Precision = 0.9488, Recall = 0.9874, F1 = 0.9677, Accuracy = 0.9932, Loss = 0.0427
Class 3: Precision = 0.9446, Recall = 0.9792, F1 = 0.9616, Accuracy = 0.9921, Loss = 0.0633
Class 4: Precision = 0.9747, Recall = 0.9827, F1 = 0.9787, Accuracy = 0.9958, Loss = 0.0559
Class 5: Precision = 0.9593, Recall = 0.9776, F1 = 0.9684, Accuracy = 0.9943, Loss = 0.0750
Class 6: Precision = 0.9874, Recall = 0.9781, F1 = 0.9827, Accuracy = 0.9967, Loss = 0.0676
Class 7: Precision = 0.9792, Recall = 0.9601, F1 = 0.9695, Accuracy = 0.9938, Loss = 0.1321
Class 8: Precision = 0.9788, Recall = 0.9476, F1 = 0.9630, Accuracy = 0.9929, Loss = 0.1553
Class 9: Precision = 0.9865, Recall = 0.9386, F1 = 0.9619, Accuracy = 0.9925, Loss = 0.1915


In [28]:
loss_all

tensor([1.1629e-03, 1.5663e-04, 5.1977e-03,  ..., 7.3909e-06, 3.2062e-04,
        8.0701e-05])

In [15]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# client-enhanced evaluation function
def client_enhanced_evaluation(model, device, test_loader):
    model.eval()
    # with torch.no_grad():
    #     for data, target in test_loader:
    #         data, target = data.to(device), target.to(device)
    #         output = model(data)
    #         y_pred.extend(output.argmax(dim=1, keepdim=True).cpu().numpy())
    #         y_true.extend(target.cpu().numpy())
            
    # Accumulate predictions and targets
    y_pred, y_true = [], []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            y_pred.append(output.argmax(dim=1).cpu())
            y_true.append(target.cpu())

    # Convert lists to tensors
    y_true = torch.cat(y_true)
    y_pred = torch.cat(y_pred)

    # Calculate the loss for each sample
    criterion = torch.nn.CrossEntropyLoss(reduction='none')  # Set reduction='none' to get the loss per sample
    loss_per_sample = criterion(y_pred, y_true).cpu()

    # Initialize storage for metrics
    precision_per_class = []
    recall_per_class = []
    f1_per_class = []
    accuracy_per_class = []
    loss_per_class = []

    # Iterate through each class (for MNIST, classes are 0 to 9)
    for class_idx in range(model.num_classes):
        # Get all predictions and ground truths for the current class
        class_mask = (y_true == class_idx)  # Mask for this class
        
        y_true_class = (y_true == class_idx)  # Convert to binary for the current class
        y_pred_class = (y_pred == class_idx)  # Convert to binary for the current class
        
        # Compute precision, recall, and F1-score for this class (treat as binary classification)
        precision = precision_score(y_true_class, y_pred_class, average='binary', pos_label=1)
        recall = recall_score(y_true_class, y_pred_class, average='binary', pos_label=1)
        f1 = f1_score(y_true_class, y_pred_class, average='binary', pos_label=1)
        accuracy = accuracy_score(y_true_class, y_pred_class)
        
        # Compute the loss for this class (average the loss of samples in this class)
        class_loss = loss_per_sample[class_mask].mean().item() if class_mask.sum() > 0 else 0
        
        # Append metrics to the lists
        precision_per_class.append(precision)
        recall_per_class.append(recall)
        f1_per_class.append(f1)
        accuracy_per_class.append(accuracy)
        loss_per_class.append(class_loss)
        
    return precision_per_class, recall_per_class, f1_per_class, accuracy_per_class, loss_per_class

# Evaluate the model
precision_per_class, recall_per_class, f1_per_class, accuracy_per_class, loss_per_class = client_enhanced_evaluation(model, device, test_loader)
print(f"Precision per class: {precision_per_class}")

RuntimeError: Expected floating point type for target with class probabilities, got Long

In [18]:
# get the last layer weights
model.fc3.weight.shape

torch.Size([10, 84])

## ResNet-9 (MNIST)

In [64]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the conv_bn_relu_pool function
def conv_bn_relu_pool(in_channels, out_channels, pool=False):
    layers = [
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True)
    ]
    if pool:
        layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)


class ResNet9(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.prep = conv_bn_relu_pool(in_channels, 64)
        self.layer1_head = conv_bn_relu_pool(64, 128, pool=True)
        self.layer1_residual = nn.Sequential(conv_bn_relu_pool(128, 128), conv_bn_relu_pool(128, 128))
        self.layer2 = conv_bn_relu_pool(128, 256, pool=True)
        self.layer3_head = conv_bn_relu_pool(256, 512, pool=True)
        self.layer3_residual = nn.Sequential(conv_bn_relu_pool(512, 512), conv_bn_relu_pool(512, 512))
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # Changed to adaptive average pooling:         self.MaxPool2d = nn.Sequential(nn.MaxPool2d(4))
        self.linear = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.prep(x)
        x = self.layer1_head(x)
        x = self.layer1_residual(x) + x
        x = self.layer2(x)
        x = self.layer3_head(x)
        x = self.layer3_residual(x) + x
        x = self.avgpool(x)  # Changed to adaptive average pooling
        x = x.view(x.size(0), -1)
        x = self.linear(x)
        return x

# Define a function to train the model
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

# Define a function to test the model
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
          f'({100. * correct / len(test_loader.dataset):.0f}%)\n')

# Main function to run the training and testing
def main():
    # Training settings
    batch_size = 64
    test_batch_size = 1000
    epochs = 10
    lr = 0.01
    momentum = 0.9
    no_cuda = False
    seed = 1

    device = check_gpu(manual_seed=True, print_info=True)

    torch.manual_seed(seed)

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST('../data', train=False, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

    model = ResNet9(in_channels=1, num_classes=10).to(device)  # Set in_channels to 1 for MNIST
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

if __name__ == '__main__':
    main()

MPS is available

Test set: Average loss: 0.0489, Accuracy: 9847/10000 (98%)


Test set: Average loss: 0.0277, Accuracy: 9906/10000 (99%)


Test set: Average loss: 0.0262, Accuracy: 9923/10000 (99%)


Test set: Average loss: 0.0202, Accuracy: 9942/10000 (99%)


Test set: Average loss: 0.0170, Accuracy: 9952/10000 (100%)


Test set: Average loss: 0.0159, Accuracy: 9956/10000 (100%)


Test set: Average loss: 0.0176, Accuracy: 9946/10000 (99%)


Test set: Average loss: 0.0185, Accuracy: 9943/10000 (99%)


Test set: Average loss: 0.0194, Accuracy: 9941/10000 (99%)


Test set: Average loss: 0.0287, Accuracy: 9913/10000 (99%)



In [24]:
# model parameters
model_lenet = LeNet5()
print(f'LeNet-5 model has {sum(p.numel() for p in model_lenet.parameters()):,} parameters')
model_resnet9 = ResNet9(in_channels=1, num_classes=9)
print(f'ResNet-9 model has {sum(p.numel() for p in model_resnet9.parameters()):,} parameters')

LeNet-5 model has 61,706 parameters
ResNet-9 model has 6,573,705 parameters


## Trainings on ours dataset

In [14]:
import non_iiddata_generator_no_drifting as noniidgen

def data_creation(dataset_name):
    # Load the dataset
    # Options: "MNIST", "FMNIST", "CIFAR10", "CIFAR100", etc.
    train_images, train_labels, test_images, test_labels = noniidgen.load_full_datasets(dataset_name)

    # Define parameters for split_feature_skew
    client_number = 10
    set_rotation = True
    rotations = 4
    scaling_rotation_low = 0.1
    scaling_rotation_high = 0.2
    set_color = True
    colors = 3
    scaling_color_low = 0.1
    scaling_color_high = 0.2
    random_order = True

    # Run split_feature_skew
    clients_data = noniidgen.split_feature_skew(
        train_features = train_images,
        train_labels = train_labels,
        test_features = test_images,
        test_labels = test_labels,
        client_number = client_number,
        set_rotation = set_rotation,
        rotations = rotations,
        scaling_rotation_low = scaling_rotation_low,
        scaling_rotation_high = scaling_rotation_high,
        set_color = set_color,
        colors = colors,
        scaling_color_low = scaling_color_low,
        scaling_color_high = scaling_color_high,
        random_order = random_order
    )
    return clients_data

data_MNIST = data_creation("MNIST")
data_FMNIST = data_creation("FMNIST")
data_CIFAR10 = data_creation("CIFAR10")
data_CIFAR100 = data_creation("CIFAR100")

KeyboardInterrupt: 

In [63]:
import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision import transforms
from math import prod

class CombinedDataset(Dataset):
    def __init__(self, features, labels, transform=None):
        self.features = features
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]

        if self.transform:
            x = self.transform(x)

        return x, y
    
def merge_data(data):
    train_features = []
    train_labels = []
    test_features = []
    test_labels = []
    for client_data in data:
        train_features.append(client_data['train_features'])
        train_labels.append(client_data['train_labels'])
        test_features.append(client_data['test_features'])
        test_labels.append(client_data['test_labels'])

    # Concatenate all the data
    train_features = torch.cat(train_features, dim=0)
    train_labels = torch.cat(train_labels, dim=0)
    test_features = torch.cat(test_features, dim=0)
    test_labels = torch.cat(test_labels, dim=0)

    return train_features, train_labels, test_features, test_labels
    
class LeNet5(nn.Module):
    def __init__(self, in_channels=1, num_classes=10, input_size=(28, 28)):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=2)  # Convolutional layer with 6 feature maps of size 5x5
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 6 feature maps of size 2x2
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1)  # Convolutional layer with 16 feature maps of size 5x5
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)  # Subsampling layer with 16 feature maps of size 2x2
        
        # Calculate the size of the features after convolutional layers
        dummy_input = torch.zeros(1, in_channels, *input_size)
        dummy_output = self.pool2(self.conv2(self.pool1(self.conv1(dummy_input))))
        self.feature_size = prod(dummy_output.size()[1:])
        
        self.fc1 = nn.Linear(self.feature_size, 120)  # Fully connected layer, output size 120
        self.fc2 = nn.Linear(120, 84)  # Fully connected layer, output size 84
        self.fc3 = nn.Linear(84, num_classes)  # Fully connected layer, output size num_classes

    def forward(self, x):
        x = F.relu(self.conv1(x))  # Apply ReLU after conv1
        x = self.pool1(x)  # Apply subsampling pool1
        x = F.relu(self.conv2(x))  # Apply ReLU after conv2
        x = self.pool2(x)  # Apply subsampling pool2
        x = x.view(x.size(0), -1)  # Flatten for fully connected layers
        x = F.relu(self.fc1(x))  # Apply ReLU after fc1
        x = F.relu(self.fc2(x))  # Apply ReLU after fc2
        x = self.fc3(x)  # Output layer
        return x

# Define a function to train the model
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

# Define a function to test the model
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
          f'({100. * correct / len(test_loader.dataset):.0f}%)\n')
    
# define device
def check_gpu(manual_seed=True, print_info=True):
    if manual_seed:
        torch.manual_seed(0)
    if torch.cuda.is_available():
        if print_info:
            print("CUDA is available")
        device = 'cuda'
        torch.cuda.manual_seed_all(0) 
    elif torch.backends.mps.is_available():
        if print_info:
            print("MPS is available")
        device = torch.device("mps")
        torch.mps.manual_seed(0)
    else:
        if print_info:
            print("CUDA is not available")
        device = 'cpu'
    return device

# Main function to run the training and testing
def main():
    # Training settings
    batch_size = 64
    test_batch_size = 1000
    epochs = 10
    lr = 0.01
    momentum = 0.9
    no_cuda = False
    seed = 1

    device = check_gpu(manual_seed=True, print_info=True)

    torch.manual_seed(seed)

    # merge the data
    train_features, train_labels, test_features, test_labels = merge_data(data_CIFAR10)

    # Define any necessary transforms
    transform = None

    # Create the datasets
    train_dataset = CombinedDataset(train_features, train_labels, transform=transform)
    test_dataset = CombinedDataset(test_features, test_labels, transform=transform)

    # Define batch sizes
    batch_size = 64
    test_batch_size = 1000

    # Create the data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

    # model = LeNet5(in_channels=3, num_classes=10, input_size=(32,32)).to(device)
    model = ResNet9(in_channels=3, num_classes=10).to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

if __name__ == '__main__':
    main()

MPS is available

Test set: Average loss: 1.7305, Accuracy: 3724/10000 (37%)


Test set: Average loss: 1.6656, Accuracy: 4110/10000 (41%)


Test set: Average loss: 1.3105, Accuracy: 5354/10000 (54%)


Test set: Average loss: 1.3724, Accuracy: 5174/10000 (52%)


Test set: Average loss: 1.1694, Accuracy: 5931/10000 (59%)


Test set: Average loss: 1.1246, Accuracy: 6184/10000 (62%)


Test set: Average loss: 1.0482, Accuracy: 6402/10000 (64%)


Test set: Average loss: 1.0358, Accuracy: 6590/10000 (66%)


Test set: Average loss: 1.1104, Accuracy: 6542/10000 (65%)


Test set: Average loss: 1.1751, Accuracy: 6542/10000 (65%)



## Examples with our models.py

In [2]:
from models import *
import non_iiddata_generator_no_drifting as noniidgen
from non_iiddata_generator_no_drifting import merge_data
import torch
import torch.optim as optim
from torch.utils.data import DataLoader

def main():
    # Training settings
    model_name = "LeNet5"   # Options: "LeNet5", "ResNet9"
    batch_size = 64
    test_batch_size = 1000
    epochs = 10
    lr = 0.01
    momentum = 0.9
    seed = 1
    transform = None
    # dataset settings
    dataset_name = "CIFAR10"
    client_number = 10
    set_rotation = True
    rotations = 4
    scaling_rotation_low = 0.1
    scaling_rotation_high = 0.2
    set_color = True
    colors = 3
    scaling_color_low = 0.1
    scaling_color_high = 0.2
    random_order = True

    device = check_gpu(manual_seed=True, print_info=True)
    torch.manual_seed(seed)

    # load data 
    train_images, train_labels, test_images, test_labels = noniidgen.load_full_datasets(dataset_name)

    # create data: split_feature_skew
    clients_data = noniidgen.split_feature_skew(
        train_features = train_images,
        train_labels = train_labels,
        test_features = test_images,
        test_labels = test_labels,
        client_number = client_number,
        set_rotation = set_rotation,
        rotations = rotations,
        scaling_rotation_low = scaling_rotation_low,
        scaling_rotation_high = scaling_rotation_high,
        set_color = set_color,
        colors = colors,
        scaling_color_low = scaling_color_low,
        scaling_color_high = scaling_color_high,
        random_order = random_order
    )

    # merge the data (for Centralized Learning Simulation)
    train_features, train_labels, test_features, test_labels = merge_data(clients_data)

    # Create the datasets
    train_dataset = CombinedDataset(train_features, train_labels, transform=transform)
    test_dataset = CombinedDataset(test_features, test_labels, transform=transform)

    # Create the data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

    # model = LeNet5(in_channels=3, num_classes=10, input_size=(32,32)).to(device)
    model = models[model_name](in_channels=3, num_classes=10, input_size=(32,32)).to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)


    for epoch in range(1, epochs + 1):
        simple_train(model, device, train_loader, optimizer, epoch)
        simple_test(model, device, test_loader)

if __name__ == '__main__':
    main()

MPS is available
Files already downloaded and verified
Files already downloaded and verified

Test set: Average loss: 2.1686, Accuracy: 2076/10000 (21%)


Test set: Average loss: 2.0218, Accuracy: 2659/10000 (27%)



KeyboardInterrupt: 

In [3]:
# print the number of parameters
model_lenet = LeNet5(in_channels=3, num_classes=10, input_size=(32,32))
print(f'LeNet-5 model has {sum(p.numel() for p in model_lenet.parameters()):,} parameters')
model_resnet9 = ResNet9(in_channels=3, num_classes=10, input_size=(32,32))
print(f'ResNet-9 model has {sum(p.numel() for p in model_resnet9.parameters()):,} parameters')

model_lenet = LeNet5(in_channels=3, num_classes=10, input_size=(28,28))
print(f'LeNet-5 model has {sum(p.numel() for p in model_lenet.parameters()):,} parameters')
model_resnet9 = ResNet9(in_channels=3, num_classes=10, input_size=(28,28))
print(f'ResNet-9 model has {sum(p.numel() for p in model_resnet9.parameters()):,} parameters')



LeNet-5 model has 83,126 parameters
ResNet-9 model has 6,590,730 parameters
LeNet-5 model has 62,006 parameters
ResNet-9 model has 6,575,370 parameters


In [2]:
import torch
import models    
import config as cfg
import numpy as np
import utils
import torch.optim as optim
from torch.utils.data import DataLoader

# check gpu and set manual seed
device = utils.check_gpu(manual_seed=True)

# model and history folder
model = models.models[cfg.model_name](in_channels=3, num_classes=cfg.n_classes, input_size=cfg.input_size).to(device)
# train_fn = utils.trainings[args.model]
# evaluate_fn = utils.evaluations[args.model]
# plot_fn = utils.plot_functions[args.model]
# config = utils.config_tests[args.dataset][args.model]

# check if metrics.csv exists otherwise delete it
# utils.check_and_delete_metrics_file(config['history_folder'] + f"client_{args.data_type}_{args.id}", question=False)

# load data
data = np.load(f'./data/client_{1}.npy', allow_pickle=True).item()
num_examples = data['train_features'].shape[0]

# Create the datasets
train_dataset = models.CombinedDataset(data['train_features'], data['train_labels'], transform=None)

# Create the data loaders
train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True)

# Optimizer and Loss function
optimizer = torch.optim.SGD(model.parameters(), lr=cfg.lr, momentum=cfg.momentum)


for epoch in range(2):
                models.simple_train(model, device, train_loader, optimizer, epoch)



MPS is available


In [4]:
from collections import OrderedDict


def get_parameters(model, config):
        return [val.cpu().numpy() for _, val in model.state_dict().items()]

def set_parameters(model, parameters):
        params_dict = zip(model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        model.load_state_dict(state_dict, strict=True)

In [5]:
x = get_parameters(model, cfg)

In [6]:
set_parameters(model, x)