In [49]:
import os
import pickle
import numpy as np
import tqdm
from matplotlib import pyplot as plt #Use for image debugging
import torch 
from torchvision import models, transforms #Need this to get VGG-11
from sklearn.model_selection import train_test_split
import pandas as pd

In [45]:
# Load the processed batches (based on James' code)
# The original data is organize slightly differently so it's somewhat messy. Sorry. 

def load_processed_batches(path, test = 0):
    #Path is the directory where the files of interest are
    
    data = []
    labels = []

    print('Loading data...')
    if path == 'cifar10_dataset/cifar-10-batches-py':
        #The original data is in its own folder with some extra files
        #So we only loop through the ones that we care about 
        files = os.listdir(path)
        files2 = []
        for i in range(len(files)): #If we want train data
            if test == 0: 
                if 'data_batch' in files[i]: 
                    files2.append(files[i])
            else: 
                if 'test_batch' in files[i]: 
                    files2.append(files[i])
        for file in tqdm.tqdm(files2): #If we want test data
            with open(os.path.join(path, file), 'rb') as f:
                processed_batch_dict = pickle.load(f, encoding='bytes')
                data.append(processed_batch_dict[b'data'])
                labels.append(processed_batch_dict[b'labels'])
        
        #Store the data and labels 
        data = np.concatenate(data)
        data = data.astype(np.float32) / 255 #Divide by 255 to get into 0 to 1 range 
        labels = np.concatenate(labels)
        
        #Reshape to the same dimensions as the processed data
        data = data.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

    else: #If we're using black boxes or Gaussian noise data 
        if test == 1: path = path + '_test' 
        files = os.listdir(path)
        for file in tqdm.tqdm(files):
            with open(os.path.join(path, file), 'rb') as f:
                processed_batch_dict = pickle.load(f, encoding='bytes')
                data.append(processed_batch_dict['data'])
                labels.append(processed_batch_dict['labels'])
        #Store the data and labels 
        data = np.concatenate(data)
        data = data.astype(np.float32) / 255 #Divide by 255 to get into 0 to 1 range 

        labels = np.concatenate(labels)
        labels = np.repeat(labels,9) #assume that the same image is repeated 9 times for each of the superpixels (3-by-3)

    return data, labels

def get_data(ds, normalize, test = 0): 

    #ds = dataset 
    #ds = 0 for standard CIFAR
    #ds = 1 for black box
    #ds = 2 for Gaussian noise 

    if ds == 0: 
        path = 'cifar10_dataset/cifar-10-batches-py'
    elif ds == 1: 
        path = 'processed_batches_boxes' 
    elif ds == 2: 
        path = 'processed_batches_gaussian_noise'

    batch_size = 64
    num_workers = 2 

    train_loader = []; 
    val_loader = []; 
    test_loader = []; 

    if test == 0: 
        processed_images, processed_labels = load_processed_batches(path,test=test)
        processed_images = processed_images.transpose(0,3,1,2) #Get into the appropriate shape for training

        #Train and validation data split 
        trainData, valData, trainLabel, valLabel = train_test_split(processed_images, processed_labels, test_size=0.2, random_state=42)

        #Transform data
        print('Preparing data for loading...')
        train_set = torch.utils.data.TensorDataset(normalize(torch.tensor(trainData)),torch.tensor(trainLabel).type(torch.LongTensor))
        val_set = torch.utils.data.TensorDataset(normalize(torch.tensor(valData)),torch.tensor(valLabel).type(torch.LongTensor)) 

        #Set-up dataloaders
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers = num_workers)
        val_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers = num_workers)
    
    else: #Get data and format it for testing 
        processed_images, processed_labels = load_processed_batches(path,test=test)
        processed_images = processed_images.transpose(0,3,1,2) #Get into the appropriate shape for training
        
        print('Preparing data for loading...')
        test_set = torch.utils.data.TensorDataset(normalize(torch.tensor(processed_images)),torch.tensor(processed_labels).type(torch.LongTensor))
        test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers = num_workers)

    return train_loader, val_loader, test_loader

def train_network(train_loader, device, num_epochs = 2, momentum = 0.9, learning_rate = 0.001, weight_decay = 0.001): 

    #Get model
    mod = models.vgg11(weights=None)
    mod.classifier[6].out_features = 10 #Adjust final layer to have the right number of classes 

    #Move model to GPU (if available) 
    mod.to(device)
    
    #Training 
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(mod.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=momentum)

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = mod(images)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if num_epochs > 3: 
            print ('Epoch [{}/{}], Loss: {:.4f}' 
                        .format(epoch+1, num_epochs, i+1, loss.item()))
        
    return mod 

def test_model(test_loader, mod, device): 
    # For test data or validation data
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = mod(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        accuracy = 100 * correct / total; 

        #print('Accuracy of the network on validation images: {} %'.format(accuracy)) 

    return accuracy
    

In [34]:
### Set up constants for experiments ### 

# Normalize images via the statistics of the original dataset
path = 'cifar10_dataset/cifar-10-batches-py'
processed_images, _ = load_processed_batches(path,test=0)

imMean = np.mean(processed_images.reshape(-1,3),axis=0)
imStd = np.std(processed_images.reshape(-1,3),axis=0)

print('Mean:',imMean)
print('Std Dev:', imStd)

#Set-up normalization
normalize = transforms.Normalize(mean=imMean,std=imStd)

#Get device 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available(): 
    print(torch.cuda.get_device_name(0))

#Get test data 
_, _, cifar_test = get_data(0, normalize, test = 1) #this one is saved as 1 batch, the other two are in 5
_, _, black_test = get_data(1, normalize, test = 1)
_, _, gauss_test = get_data(2, normalize, test = 1)

#Other parameters: 
momentums = np.linspace(0.5,1.3,5)
lrs = np.logspace(-1,-5,5)
decays = np.logspace(-1,-5,5)

Loading data...


100%|██████████| 5/5 [00:00<00:00, 40.28it/s]


Mean: [0.32768 0.32768 0.32768]
Std Dev: [0.27755317 0.26929596 0.26811677]
NVIDIA GeForce RTX 4090
Loading data...


100%|██████████| 1/1 [00:00<00:00, 40.72it/s]


Preparing data for loading...
Loading data...


100%|██████████| 5/5 [00:00<00:00, 36.17it/s]


Preparing data for loading...
Loading data...


100%|██████████| 5/5 [00:00<00:00, 32.53it/s]


Preparing data for loading...


In [46]:
# for each dataset 
#   for each momentum 
#   for each learning rate 
#   for each weight decay 
#   train for 20 epochs (save network)
#   evalulate on all three test sets

#Save the hyperparamters: 
bestHP = np.zeros((3,3))

#Hyperparameter storage 
#               cifar   black   gauss 
#momentum       
#learning rate 
#weight decay 

#Save the performances 
testMatrix = np.zeros((3,3))
#               cifar   black   gauss 
#cifar_test       
#black_test
#gauss_test  

for ds in np.arange(3): 
    
    train_loader, val_loader, _ = get_data(ds, normalize, test = 0)

    ## Momentum ##
    acc_list = np.zeros(5) #Initalize validation accuracy matrix 
    print('Optimizing momentum...')
    for i in tqdm.tqdm(np.arange(len(momentums))): 
        mod = train_network(train_loader, device, num_epochs = 2, momentum = momentums[i], learning_rate = 0.001, weight_decay = 0.001)
        acc_list[i] = test_model(val_loader, mod, device)
    bestIdx =  np.argmax(acc_list) #Get the index of the best value 
    bestHP[0,ds] = momentums[bestIdx] #Save the value to the hyperparamter matrix 

    ## Learning rate ##
    print('Optimizing learning rate...')
    acc_list = np.zeros(5) #Initalize validation accuracy matrix 
    for i in tqdm.tqdm(np.arange(len(lrs))): 
        mod = train_network(train_loader, device, num_epochs = 2, momentum = bestHP[0,ds], learning_rate = lrs[i], weight_decay = 0.001)
        acc_list[i] = test_model(val_loader, mod, device)
    bestIdx =  np.argmax(acc_list) #Get the index of the best value 
    bestHP[1,ds] = lrs[bestIdx] #Save the value to the hyperparamter matrix 

    ## Weight decay ##
    print('Optimizing weight decay...')
    acc_list = np.zeros(5) #Initalize validation accuracy matrix 
    for i in tqdm.tqdm(np.arange(len(decays))): 
        mod = train_network(train_loader, device, num_epochs = 2, momentum = bestHP[0,ds], learning_rate = bestHP[1,ds], weight_decay = decays[i])
        acc_list[i] = test_model(val_loader, mod, device)
    bestIdx =  np.argmax(acc_list) #Get the index of the best value 
    bestHP[2,ds] = decays[bestIdx] #Save the value to the hyperparamter matrix 

    #Train the model for many epochs with the best hyperparameters
    mod = train_network(train_loader, device, num_epochs = 20, momentum = bestHP[0,ds], learning_rate = bestHP[1,ds], weight_decay = bestHP[2,ds])
    
    #Test the model on each test set 
    testMatrix[0,ds] = test_model(cifar_test, mod, device)
    testMatrix[1,ds] = test_model(black_test, mod, device)
    testMatrix[2,ds] = test_model(gauss_test, mod, device)

    #Save the model (just in case) 
    if ds == 0: 
        mName = 'cifar_model'
    elif ds == 1: 
        mName = 'black_model'
    elif ds == 2: 
        mName = 'gauss_model'
    
    torch.save(mod.state_dict(),mName)

    #Note: 
    # To load these models again do: 
    # mod = models.vgg11(weights=None) 
    # mod.load_state_dict(torch.load('insert_file_name_here'))

#Save the results
np.save('optimized hyperparameters.npy',bestHP)
np.save('test_accuracies.npy',testMatrix)

Loading data...


100%|██████████| 5/5 [00:00<00:00, 52.35it/s]


Preparing data for loading...
Optimizing momentum...


100%|██████████| 5/5 [02:10<00:00, 26.10s/it]


Optimizing learning rate...


100%|██████████| 5/5 [02:10<00:00, 26.05s/it]


Optimizing weight decay...


100%|██████████| 5/5 [02:10<00:00, 26.06s/it]


Epoch [1/20], Loss: 625.0000
Epoch [2/20], Loss: 625.0000
Epoch [3/20], Loss: 625.0000
Epoch [4/20], Loss: 625.0000
Epoch [5/20], Loss: 625.0000
Epoch [6/20], Loss: 625.0000
Epoch [7/20], Loss: 625.0000
Epoch [8/20], Loss: 625.0000
Epoch [9/20], Loss: 625.0000
Epoch [10/20], Loss: 625.0000
Epoch [11/20], Loss: 625.0000
Epoch [12/20], Loss: 625.0000
Epoch [13/20], Loss: 625.0000
Epoch [14/20], Loss: 625.0000
Epoch [15/20], Loss: 625.0000
Epoch [16/20], Loss: 625.0000
Epoch [17/20], Loss: 625.0000
Epoch [18/20], Loss: 625.0000
Epoch [19/20], Loss: 625.0000
Epoch [20/20], Loss: 625.0000
Loading data...


100%|██████████| 5/5 [00:00<00:00,  7.57it/s]


Preparing data for loading...
Optimizing momentum...


100%|██████████| 5/5 [12:06<00:00, 145.40s/it]


Optimizing learning rate...


100%|██████████| 5/5 [11:57<00:00, 143.42s/it]


Optimizing weight decay...


100%|██████████| 5/5 [11:52<00:00, 142.60s/it]


Epoch [1/20], Loss: 5625.0000
Epoch [2/20], Loss: 5625.0000
Epoch [3/20], Loss: 5625.0000
Epoch [4/20], Loss: 5625.0000
Epoch [5/20], Loss: 5625.0000
Epoch [6/20], Loss: 5625.0000
Epoch [7/20], Loss: 5625.0000
Epoch [8/20], Loss: 5625.0000
Epoch [9/20], Loss: 5625.0000
Epoch [10/20], Loss: 5625.0000
Epoch [11/20], Loss: 5625.0000
Epoch [12/20], Loss: 5625.0000
Epoch [13/20], Loss: 5625.0000
Epoch [14/20], Loss: 5625.0000
Epoch [15/20], Loss: 5625.0000
Epoch [16/20], Loss: 5625.0000
Epoch [17/20], Loss: 5625.0000
Epoch [18/20], Loss: 5625.0000
Epoch [19/20], Loss: 5625.0000
Epoch [20/20], Loss: 5625.0000
Loading data...


100%|██████████| 5/5 [00:00<00:00,  7.07it/s]


Preparing data for loading...
Optimizing momentum...


100%|██████████| 5/5 [11:54<00:00, 142.93s/it]


Optimizing learning rate...


100%|██████████| 5/5 [11:53<00:00, 142.76s/it]


Optimizing weight decay...


100%|██████████| 5/5 [11:53<00:00, 142.73s/it]


Epoch [1/20], Loss: 5625.0000
Epoch [2/20], Loss: 5625.0000
Epoch [3/20], Loss: 5625.0000
Epoch [4/20], Loss: 5625.0000
Epoch [5/20], Loss: 5625.0000
Epoch [6/20], Loss: 5625.0000
Epoch [7/20], Loss: 5625.0000
Epoch [8/20], Loss: 5625.0000
Epoch [9/20], Loss: 5625.0000
Epoch [10/20], Loss: 5625.0000
Epoch [11/20], Loss: 5625.0000
Epoch [12/20], Loss: 5625.0000
Epoch [13/20], Loss: 5625.0000
Epoch [14/20], Loss: 5625.0000
Epoch [15/20], Loss: 5625.0000
Epoch [16/20], Loss: 5625.0000
Epoch [17/20], Loss: 5625.0000
Epoch [18/20], Loss: 5625.0000
Epoch [19/20], Loss: 5625.0000
Epoch [20/20], Loss: 5625.0000


In [54]:
colLabel = ["CIFAR model", "Black model", "Gauss model"]
rowLabel1 = ["Momentum", "Learning rate", "Weight decay"]
rowLabel2 = ["CIFAR test", "Black test", "Gauss test"]

pd.DataFrame(bestHP,rowLabel1,colLabel)

Unnamed: 0,CIFAR model,Black model,Gauss model
Momentum,0.9,0.9,0.9
Learning rate,0.001,0.01,0.01
Weight decay,1e-05,0.0001,0.0001


In [55]:
pd.DataFrame(testMatrix,rowLabel2,colLabel)

Unnamed: 0,CIFAR model,Black model,Gauss model
CIFAR test,73.9,79.87,78.2
Black test,55.713333,77.111111,49.623333
Gauss test,72.44,77.758889,77.797778
