In [1]:
import sys
sys.path.append('/kaggle/input/customresnet-dl23')

import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import copy
import random
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from Custom_ResNet18 import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



## Loading Partial Data

In [2]:
def create_partial_datasets(classes):
    # Load CIFAR-100 dataset
    train_data = torchvision.datasets.CIFAR100(
        root='data',
        train=True,
        transform=torchvision.transforms.ToTensor(),
        download=True
    )
    test_data = torchvision.datasets.CIFAR100(
        root='data',
        train=False,
        transform=torchvision.transforms.ToTensor(),
    )
    
    class_names = [train_data.classes[i] for i in classes]
    
    # Mapping for labels to start at 0
    mapping = {old: new for new, old in enumerate(classes)}

    # Filter and map the training dataset
    train_idx = [i for i, label in enumerate(train_data.targets) if label in classes]
    train_data.data = train_data.data[train_idx]
    train_data.targets = torch.tensor([mapping[train_data.targets[i]] for i in train_idx])

    # Filter and map the testing dataset
    test_idx = [i for i, label in enumerate(test_data.targets) if label in classes]
    test_data.data = test_data.data[test_idx]
    test_data.targets = torch.tensor([mapping[test_data.targets[i]] for i in test_idx])
    

    return train_data, test_data, class_names


In [3]:
classes_per_set = 10

train_classes = []
test_classes = []
string_labels = []

for i in range(0, 100, classes_per_set):
    class_indices = [num for num in range(i, i + classes_per_set)]
    train, test, class_names = create_partial_datasets(class_indices)
    train_classes.append(train)
    test_classes.append(test)
    string_labels.append(class_names)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:02<00:00, 56483800.92it/s]


Extracting data/cifar-100-python.tar.gz to data
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [4]:
loaders = []

for idx, (train_data, test_data) in enumerate(zip(train_classes, test_classes)):
    loaders.append({
        'train': torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True, num_workers=1),
        'test': torch.utils.data.DataLoader(test_data, batch_size=100, shuffle=True, num_workers=1),
    })

## Training Models

In [5]:
def evaluate(model, test_loader):

    model.eval()
    
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            test_output = model(images)
            pred_y = torch.max(test_output, 1)[1]
            correct += (pred_y == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return accuracy

In [6]:
def train(num_epochs, train_loader, model, optimizer, loss_func, test_loader=None):
    
    test_accuracies = []
    
    model.train()
    
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
                        
            images = images.to(device)
            labels = labels.to(device)
            
            output = model(images)
            loss = loss_func(output, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        # We check the test accuracy after every single epoch
        if test_loader is not None:
            accuracy = evaluate(model, test_loader)
            model.train()

            test_accuracies.append(accuracy)
        
        if (epoch+1) % 5 == 0: 
            print(f"  Finished epoch {epoch+1}")
    
    return test_accuracies

In [None]:
num_epochs = 20
num_models = 10

models = [custom_resnet_18(num_classes=10).to(device) for _ in range(num_models)]
optimizers = [torch.optim.Adam(model.parameters(), lr=0.01) for model in models]
loss_func = nn.CrossEntropyLoss()

for i, model in enumerate(models):
    print(f"Training model [{i+1}/{num_models}] on dataset {i}")
    
    optimizer = optimizers[i]
    # Use modulo 9 to make sure that we do not train on final dataset
    train(num_epochs, loaders[i%9]["train"], model, optimizer, loss_func)

Training model [1/10] on dataset 0
  Finished epoch 5
  Finished epoch 10
  Finished epoch 15
  Finished epoch 20
Training model [2/10] on dataset 1
  Finished epoch 5
  Finished epoch 10
  Finished epoch 15
  Finished epoch 20
Training model [3/10] on dataset 2
  Finished epoch 5
  Finished epoch 10
  Finished epoch 15
  Finished epoch 20
Training model [4/10] on dataset 3
  Finished epoch 5
  Finished epoch 10
  Finished epoch 15
  Finished epoch 20
Training model [5/10] on dataset 4
  Finished epoch 5
  Finished epoch 10
  Finished epoch 15


In [None]:
for i, model in enumerate(models):
    acc = evaluate(model, loaders[i%9]["test"])
    print(f"Testing model [{i+1}/{num_models}]: {acc}")

## Custom Clustering

In [98]:
def calculate_cosine_similarity(cluster_group, vec):
    total_distance = 0

    for cluster_vec in cluster_group:
        
        # Reshape vectors to 2D arrays as required by cosine_similarity function
        vec1 = np.array(cluster_vec).reshape(1, -1)
        vec2 = np.array(vec).reshape(1, -1)

        # Calculate cosine similarity and convert to cosine distance
        similarity = cosine_similarity(vec1, vec2)
        distance = 1 - similarity

        # Since cosine_similarity returns a 2D array, we take the first element
        total_distance += distance[0][0]

    return total_distance

In [136]:
def get_custom_clustering_groups(filters_per_model, num_filters_per_model):
    filter_vectors = []
    
    num_models = len(filters_per_model)
    
    for idx, filters in enumerate(filters_per_model):
        
        filter_vectors.append([])

        for i in range(filters.shape[0]):
            # Flatten and normalize the filter
            filter_vec = filters[i].flatten()
            filter_vec /= np.linalg.norm(filter_vec)
            filter_vectors[idx].append(filter_vec)
            
    new_groups = [[] for _ in range(num_filters_per_model)]
    permutations = [[] for _ in range(num_models)]
    
    # Now we go through all models and assign filters in a greedy manner
    for i in range(num_models):
        model_filters = filter_vectors[i]
        used_indices = set()
        
        # We go through every group of the new clusters and assign the min distance vector from the next model to it
        for cluster_idx, cluster_group in enumerate(new_groups):
            min_distance = float('inf')
            selected_vector_idx = None
            
            for idx, vec in enumerate(model_filters):
                dist = calculate_cosine_similarity(cluster_group, vec)
                if dist < min_distance and idx not in used_indices:
                    min_distance = dist
                    selected_vector_idx = idx
            
            new_groups[cluster_idx].append(model_filters[selected_vector_idx])
            used_indices.add(selected_vector_idx)
            permutations[i].append(selected_vector_idx)

    return new_groups, permutations

In [137]:
def custom_clustering(filters, num_input_channels, num_filters, kernel_size, include_permutations=False):
    
    clustered_groups, permutations = get_custom_clustering_groups(filters, num_filters)
    
    filter_vectors = np.array(clustered_groups)
    average_filters = [np.mean(np.array(vectors), axis=0) for vectors in filter_vectors]
    
    shaped_average_filters = np.array(average_filters).reshape((num_filters, num_input_channels, kernel_size, kernel_size))
    torch_shaped_average_filters = torch.tensor(shaped_average_filters, dtype=torch.float32).to(device)
    
    if include_permutations:
        return torch_shaped_average_filters, permutations
    else:
        return torch_shaped_average_filters

## GMM

In [14]:
def gmm_clustering(filters, num_input_channels, num_filters, kernel_size):
    filter_vectors = []
    
    for model_filters in filters:
        for i in range(model_filters.shape[0]):
            # Flatten and normalize the filter
            filter_vec = model_filters[i].flatten()
            filter_vec /= np.linalg.norm(filter_vec)
            filter_vectors.append(filter_vec)
    
    filter_vectors = np.array(filter_vectors)
    
    # Use Gaussian Mixture Models for clustering
    gmm = GaussianMixture(n_components=num_filters, init_params='kmeans')
    gmm.fit(filter_vectors)
    cluster_labels = gmm.predict(filter_vectors)

    average_filters = []

    for cluster_num in range(num_filters):
        # Filters belonging to the current cluster
        cluster_filters = filter_vectors[cluster_labels == cluster_num]

        # Compute the average filter for this cluster
        average_filter = np.mean(cluster_filters, axis=0)
        average_filters.append(average_filter)

    shaped_average_filters = np.array(average_filters).reshape((num_filters, num_input_channels, kernel_size, kernel_size))
    torch_shaped_average_filters = torch.tensor(shaped_average_filters, dtype=torch.float32).to(device)
    
    return torch_shaped_average_filters

## K-Means

In [109]:
def kmeans_clustering(filters, num_input_channels, num_filters, kernel_size):
    
    filter_vectors = []
        
    num_models = len(filters)
    
    for idx, model_filters in enumerate(filters):
        
        filter_vectors.append([])

        for i in range(model_filters.shape[0]):
            # Flatten and normalize the filter
            filter_vec = model_filters[i].flatten()
            filter_vec /= np.linalg.norm(filter_vec)
            filter_vectors[idx].append(filter_vec)
    
    filter_vectors = np.vstack(filter_vectors)
    kmeans = KMeans(n_clusters=num_filters, n_init='auto')
    cluster_labels = kmeans.fit_predict(filter_vectors)

    average_filters = []

    for cluster_num in range(num_filters):
        
        # Find indices where the cluster label matches the current cluster number
        indices = np.where(cluster_labels == cluster_num)[0]

        # Filters belonging to the current cluster
        cluster_filters = filter_vectors[indices]

        # Compute the average filter for this cluster
        average_filter = np.mean(cluster_filters, axis=0)
        average_filters.append(average_filter)
        
    shaped_average_filters = np.array(average_filters).reshape((num_filters, num_input_channels, kernel_size, kernel_size))
    torch_shaped_average_filters = torch.tensor(shaped_average_filters, dtype=torch.float32).to(device)
    
    return torch_shaped_average_filters

## Permutation

In [128]:
def permute_bias(permuted_model_bias, perm):
    permuted_model_bias.weight = torch.nn.Parameter(permuted_model_bias.weight[perm])
    permuted_model_bias.bias = torch.nn.Parameter(permuted_model_bias.bias[perm])
    permuted_model_bias.running_mean = permuted_model_bias.running_mean[perm]
    permuted_model_bias.running_var = permuted_model_bias.running_var[perm]

def permute_layer(permuted_layer, permuted_layer_next, permuted_bias, perm = None):
    if not perm is None: 
        permuted_layer.weight = torch.nn.Parameter(permuted_layer.weight[perm])
        permute_bias(permuted_bias, perm)
        permuted_layer_next.weight = torch.nn.Parameter(permuted_layer_next.weight.transpose(0,1)[perm].transpose(0,1))

In [129]:
def permute_weights(model, permutation):
    
     with torch.no_grad():
        permuted_model = copy.deepcopy(model)
        permute_layer(permuted_model.conv1, permuted_model.layer1[0].conv1, permuted_model.bn1, permutation)
        return permuted_model

## Initialization

In [15]:
def create_initialized_model(models):
    final_model = custom_resnet_18(num_classes=10).to(device)
    
    #Apply first layer clustering techniques
    trained_filters_layer1 = [model.conv1.weight.data.cpu().numpy() for model in models]
    sampled_filters_layer1 = gmm_clustering(filters=trained_filters_layer1,
                                               num_input_channels=3,
                                               num_filters=64,
                                               kernel_size=7)
    
    # Apply second layer clustering techniques
    trained_filters_layer2 = [model.layer1[0].conv1.weight.data.cpu().numpy() for model in models]
    sampled_filters_layer2 = gmm_clustering(filters=trained_filters_layer2,
                                               num_input_channels=64,
                                               num_filters=64,
                                               kernel_size=3)
    
    # Apply third layer clustering techniques
    trained_filters_layer3 = [model.layer1[0].conv2.weight.data.cpu().numpy() for model in models]
    sampled_filters_layer3 = gmm_clustering(filters=trained_filters_layer3,
                                               num_input_channels=64,
                                               num_filters=64,
                                               kernel_size=3)
    
    # Apply fourth layer clustering techniques
    trained_filters_layer4 = [model.layer1[1].conv1.weight.data.cpu().numpy() for model in models]
    sampled_filters_layer4 = gmm_clustering(filters=trained_filters_layer4,
                                               num_input_channels=64,
                                               num_filters=64,
                                               kernel_size=3)
    
    # Apply fifth layer clustering techniques
    trained_filters_layer5 = [model.layer1[1].conv2.weight.data.cpu().numpy() for model in models]
    sampled_filters_layer5 = gmm_clustering(filters=trained_filters_layer5,
                                               num_input_channels=64,
                                               num_filters=64,
                                               kernel_size=3)
    
    
    final_model.conv1.weight.data = sampled_filters_layer1
    final_model.layer1[0].conv1.weight.data = sampled_filters_layer2
    final_model.layer1[0].conv2.weight.data = sampled_filters_layer3
    final_model.layer1[1].conv1.weight.data = sampled_filters_layer4
    final_model.layer1[1].conv2.weight.data = sampled_filters_layer5
    
    
    return final_model

In [19]:
def compare_models(models, loaders, num_epochs):
    
    random_model = custom_resnet_18(num_classes=10).to(device)
    final_model = create_initialized_model(models)
    
    random_optimizer = torch.optim.Adam(random_model.parameters(), lr=0.01)
    final_optimizer = torch.optim.Adam(final_model.parameters(), lr=0.01)
    
    loss_func = torch.nn.CrossEntropyLoss()
    
    final_accuracies = train(num_epochs, loaders[9]["train"], final_model, final_optimizer, loss_func,
                           test_loader=loaders[9]["test"])
    random_accuracies = train(num_epochs, loaders[9]["train"], random_model, random_optimizer, loss_func,
                              test_loader=loaders[9]["test"])


    return random_accuracies, final_accuracies

In [20]:
def get_average_score(num_iterations, models, loaders, plot_result=True):
    num_epochs = 20
    
    random_cumulative_accuracies = []
    final_cumulative_accuracies = []

    for i in range(num_iterations):
        random_accuracies, final_accuracies = compare_models(models, loaders, num_epochs)
        
        print(f"Random accuracies after 1st epoch: {random_accuracies[0]}, after 2nd epoch: {random_accuracies[1]}, after 3rd epoch {random_accuracies[2]}")
        print(f"Final accuracies after 1st epoch: {final_accuracies[0]}, after 2nd epoch: {final_accuracies[1]}, after 3rd epoch {final_accuracies[2]}")

        if not random_cumulative_accuracies:
            random_cumulative_accuracies = random_accuracies
            final_cumulative_accuracies = final_accuracies
        else:
            # Accumulate the accuracies
            random_cumulative_accuracies = [x + y for x, y in zip(random_cumulative_accuracies, random_accuracies)]
            final_cumulative_accuracies = [x + y for x, y in zip(final_cumulative_accuracies, final_accuracies)]

    # Compute the average accuracies
    random_average_accuracies = [x / num_iterations for x in random_cumulative_accuracies]
    final_average_accuracies = [x / num_iterations for x in final_cumulative_accuracies]
    
    if plot_result:
        plt.figure(figsize=(10, 6))
        plt.plot(random_average_accuracies, label='Random Model - Average Accuracy')
        plt.plot(final_average_accuracies, label='Our Model - Average Accuracy')
        plt.title('Model Comparison - Average Accuracy per Epoch')
        plt.xlabel('Epochs')
        plt.ylabel('Average Accuracy')
        plt.legend()
        plt.show()

In [21]:
get_average_score(10, models, loaders)

/usr/local/src/pytorch/aten/src/ATen/native/cuda/Loss.cu:240: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [0,0,0] Assertion `t >= 0 && t < n_classes` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/Loss.cu:240: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [1,0,0] Assertion `t >= 0 && t < n_classes` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/Loss.cu:240: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [2,0,0] Assertion `t >= 0 && t < n_classes` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/Loss.cu:240: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [3,0,0] Assertion `t >= 0 && t < n_classes` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/Loss.cu:240: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [4,0,0] Assertion `t >= 0 && t < n_classes` failed.
/usr/local/src/pytorch/aten/src/ATen/native/cuda/Loss.cu:240: nll_loss_forward_reduce_cuda_kernel_2d: block: [

RuntimeError: cuDNN error: CUDNN_STATUS_MAPPING_ERROR

In [None]:
models[0]