<a href="https://colab.research.google.com/github/chirag-sharma-00/cs182-cv-project/blob/main/Adverserial_project_notebook_inception.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import matplotlib.pyplot as plt
import pandas as pd
import glob
import pathlib
import tqdm
import os
import time
import copy
from __future__ import print_function
from __future__ import division
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset
from PIL import Image
from google.colab import drive

In [None]:
!rm -rf sample_data


In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip drive/MyDrive/182-cv-project/data/tiny-imagenet-200.zip

In [None]:
data_dir = pathlib.Path('tiny-imagenet-200/')

In [None]:
#image_count = len(list(data_dir.glob('**/*.JPEG')))
!rm tiny-imagenet-200/.DS_Store
!rm tiny-imagenet-200/train/.DS_Store
!rm tiny-imagenet-200/val/.DS_Store
!rm tiny-imagenet-200/test/.DS_Store

rm: cannot remove 'tiny-imagenet-200/.DS_Store': No such file or directory
rm: cannot remove 'tiny-imagenet-200/train/.DS_Store': No such file or directory
rm: cannot remove 'tiny-imagenet-200/val/.DS_Store': No such file or directory
rm: cannot remove 'tiny-imagenet-200/test/.DS_Store': No such file or directory


## Preliminary Analysis

In [None]:
images = pd.read_csv("tiny-imagenet-200/words.txt", names = ['Id', 'labels'], sep = '\t')

In [None]:
CLASS_NAMES = np.array([item.name for item in (data_dir / 'train').glob('*')])
CLASS_NAMES.sort()
print(len(CLASS_NAMES)) #should be 200
sum([cls in images['Id'].unique() for cls in CLASS_NAMES])

200


200

## Create augmented validation data folder

In [None]:
import data_augmentation as aug
import os

In [None]:
#os.mkdir("tiny-imagenet-200/val/augmented_images")

prev_annotations = pd.read_csv("tiny-imagenet-200/val/val_annotations.txt", 
                              sep='\t', names=["Filename", "Class", "BB1", 
                                               "BB2", "BB3", "BB4"])
prev_annotations.drop(["BB1", "BB2", "BB3", "BB4"], axis=1, inplace=True)
new_annotations = pd.DataFrame(columns=["Filename", "Class"])

for img_file in os.listdir("tiny-imagenet-200/val/images"):
  path = "tiny-imagenet-200/val/images" + "/" + img_file
  augmented_imgs = aug.augmented_data_from_path(path)
  for i, a in enumerate(augmented_imgs):
    im = Image.fromarray(a)
    prefix = img_file.split(".")[0]
    im.save("tiny-imagenet-200/val/augmented_images/" + prefix + "_" + str(i) + 
            ".JPEG")
    new_annotations = new_annotations.append({"Filename" : prefix + "_" + str(i) + ".JPEG", 
                            "Class" : prev_annotations.loc[
                              prev_annotations["Filename"] == prefix + ".JPEG",
                              "Class"
                            ].item()}, ignore_index=True)
new_annotations.to_csv("tiny-imagenet-200/val/augmented_val_annotations.txt", 
                       sep='\t', index=False, header=False)

In [None]:
!zip -r /content/augmented_images.zip /content/tiny-imagenet-200/val/augmented_images/


zip error: Nothing to do! (try: zip -r /content/augmented_images.zip . -i /content/tiny-imagenet-200/val/augmented_images/)


## Feature extraction/fine tuning model training code

In [None]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=1, is_inception=True):
    if torch.cuda.is_available():
      model = model.cuda()
      
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training (no val for now)
        for phase in ['train','val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # move to gpu
                inputs = inputs.to(device)
                labels.data = labels.data.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()
                
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.

                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [None]:
def set_parameter_requires_grad(model, feature_extracting=True):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [None]:
def predict(dataloaders, model): 
    """
    Run a forward pass (without caching data) for given model and return accuracy
    """
    if torch.cuda.is_available():
      model = model.cuda()

    accuracies = []
    model.eval()
    
    for phase in tqdm.tqdm(['train', 'val']): 
        counter = 0
        running_corrects = 0
        running_total = 0

        for inputs, labels in dataloaders[phase]: 
            inputs = inputs.to(device)
            labels.data = labels.data.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            counter += 1
            
            running_corrects += torch.sum(preds == labels.data)
            running_total += len(preds)
            
        phase_acc = running_corrects / running_total
        print(phase_acc)
        accuracies.append(phase_acc)

    return accuracies
            

In [None]:
# Custom dataloader based on https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
class ValidationDataset(Dataset):
  def __init__(self, annotations_file, img_dir, transform=None):
    self.img_labels = pd.read_csv(annotations_file, sep='\t', names=['image', 'label', 'x1', 'y1', 'x2', 'y2'])
    self.img_dir = img_dir
    self.transform = transform

  def __len__(self):
    return len(self.img_labels)

  def __getitem__(self, idx):
    img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
    image = Image.open(img_path)
    image = image.convert('RGB')
    label = self.img_labels.iloc[idx, 1]
    label = np.where(label==CLASS_NAMES)[0][0]
    if self.transform:
        image = self.transform(image)
    return image, label

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Testing Inception v3 out of the box

In [None]:
data_dir = data_dir
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "inception_v3"
# Number of classes in the dataset
num_classes = 200
# Batch size for training (change depending on how much memory you have)
batch_size = 8
# Number of epochs to train for
num_epochs = 3
# Flag for feature extracting. When False, we finetune the whole model,
# when True we only update the reshaped layer params
feature_extract = True
CUDA_LAUNCH_BLOCKING=1

In [None]:
model = torch.hub.load('pytorch/vision:v0.9.0', model_name, pretrained=True)

Downloading: "https://github.com/pytorch/vision/archive/v0.9.0.zip" to /root/.cache/torch/hub/v0.9.0.zip
Downloading: "https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-1a9a5a14.pth


HBox(children=(FloatProgress(value=0.0, max=108857766.0), HTML(value='')))




In [None]:
#change last layers in model to match tiny imagenet
model.AuxLogits.fc = nn.Linear(768, num_classes)
model.fc = nn.Linear(2048, num_classes)

In [None]:
if torch.cuda.is_available():
  model.cuda()

In [None]:
#inception expects input size 3*299*299
input_size = 299

### Feature extracting model performance on original validation data

In [None]:
data_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), 
                                  data_transform),
    'val': ValidationDataset(os.path.join(data_dir, 'val', 'val_annotations.txt'), 
                                      os.path.join(data_dir, 'val', 'images'), data_transform),
}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
num_classes = 200
input_size = 299

model_fe = models.inception_v3(pretrained=True)
set_parameter_requires_grad(model_fe, feature_extract)
# Handle the auxilary net
num_ftrs = model_fe.AuxLogits.fc.in_features
model_fe.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_fe.fc.in_features
model_fe.fc = nn.Linear(num_ftrs,num_classes)

In [None]:
# Send the model to GPU
model_fe = model_fe.to(device)

# Gather the parameters to be optimized/updated in this run. We will only update 
# the parameters that we have just initialized, i.e. the parameters with 
# requires_grad is True.
print("Params to learn:")
params_to_update = []
for name, param in model_fe.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 AuxLogits.fc.weight
	 AuxLogits.fc.bias
	 fc.weight
	 fc.bias


In [None]:
optimizer_fe = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
# Train and evaluate
model_fe, hist = train_model(model_fe, dataloaders_dict, criterion, 
                             optimizer_fe, num_epochs=num_epochs, 
                             is_inception=True)

In [None]:
accuracies = predict(dataloaders_dict, model_fe)

In [None]:
for i, key in enumerate(image_datasets.keys()):
  print(key + " accuracy = ", accuracies[i])

In [None]:
torch.save(model_fe.state_dict(), 'cs182_project_models/oob-inception.pt')

### Feature extracting model performance on augmented validation data

In [None]:
data_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), 
                                  data_transform),
    'val': ValidationDataset(os.path.join(data_dir, 'val', 'augmented_val_annotations.txt'), 
                                      os.path.join(data_dir, 'val', 'augmented_val_images'), data_transform),
}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
accuracies = predict(dataloaders_dict, model_fe)

In [None]:
for i, key in enumerate(image_datasets.keys()):
  print(key + " accuracy = ", accuracies[i])

## Fine-tuning the out of the box Inception v3 model

In [None]:
data_dir = data_dir
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "inception_v3"
# Number of classes in the dataset
num_classes = 200
# Batch size for training (change depending on how much memory you have)
batch_size = 32
# Number of epochs to train for
num_epochs = 3
# Flag for feature extracting. When False, we finetune the whole model,
# when True we only update the reshaped layer params
feature_extract = True
CUDA_LAUNCH_BLOCKING=1

In [None]:
model = torch.hub.load('pytorch/vision:v0.9.0', model_name, pretrained=True)

Downloading: "https://github.com/pytorch/vision/archive/v0.9.0.zip" to /root/.cache/torch/hub/v0.9.0.zip
Downloading: "https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-1a9a5a14.pth


HBox(children=(FloatProgress(value=0.0, max=108857766.0), HTML(value='')))




In [None]:
#change last layers in model to match tiny imagenet
model.AuxLogits.fc = nn.Linear(768, num_classes)
model.fc = nn.Linear(2048, num_classes)

In [None]:
if torch.cuda.is_available():
  model.cuda()

In [None]:
#inception expects input size 3*299*299
input_size = 299

### Fine-tuned model performance on original validation data

In [None]:
data_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), 
                                  data_transform),
    'val': ValidationDataset(os.path.join(data_dir, 'val', 'val_annotations.txt'), 
                                      os.path.join(data_dir, 'val', 'images'), data_transform),
}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

NameError: ignored

In [None]:
num_classes = 200
input_size = 299

model_fe = models.inception_v3(pretrained=True)
set_parameter_requires_grad(model_fe, not feature_extract)
# Handle the auxilary net
num_ftrs = model_fe.AuxLogits.fc.in_features
model_fe.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_fe.fc.in_features
model_fe.fc = nn.Linear(num_ftrs,num_classes)

In [None]:
# Send the model to GPU
model_fe = model_fe.to(device)

# Gather the parameters to be optimized/updated in this run. We will only update 
# the parameters that we have just initialized, i.e. the parameters with 
# requires_grad is True.
print("Params to learn:")
params_to_update = []
for name, param in model_fe.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

In [None]:
optimizer_fe = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
# Train and evaluate
model_fe, hist = train_model(model_fe, dataloaders_dict, criterion, 
                             optimizer_fe, num_epochs=num_epochs, 
                             is_inception=True)

Epoch 0/2
----------
train Loss: 3.3031 Acc: 0.5644
val Loss: 0.9213 Acc: 0.7682

Epoch 1/2
----------
train Loss: 1.3897 Acc: 0.7857
val Loss: 0.8355 Acc: 0.7884

Epoch 2/2
----------
train Loss: 0.9278 Acc: 0.8555
val Loss: 0.8725 Acc: 0.7807

Training complete in 45m 4s
Best val Acc: 0.788400


In [None]:
accuracies = predict(dataloaders_dict, model_fe)

 50%|█████     | 1/2 [05:11<05:11, 311.14s/it]

tensor(0.8961, device='cuda:0')


100%|██████████| 2/2 [05:42<00:00, 171.38s/it]

tensor(0.7884, device='cuda:0')





In [None]:
for i, key in enumerate(image_datasets.keys()):
  print(key + " accuracy = ", accuracies[i])

train accuracy =  tensor(0.8961, device='cuda:0')
val accuracy =  tensor(0.7884, device='cuda:0')


In [None]:
torch.save(model_fe.state_dict(), '/content/drive/MyDrive/cs182_project_models/fine-tuned-inception.pt')

### Fine-tuned model performance on augmented validation data

In [None]:
data_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), 
                                  data_transform),
    'val': ValidationDataset(os.path.join(data_dir, 'val', 'augmented_val_annotations.txt'), 
                                      os.path.join(data_dir, 'val', 'augmented_val_images'), data_transform),
}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
accuracies = predict(dataloaders_dict, model_fe)



  0%|          | 0/2 [00:00<?, ?it/s][A[A

 50%|█████     | 1/2 [05:08<05:08, 308.68s/it][A[A

tensor(0.8961, device='cuda:0')




100%|██████████| 2/2 [05:40<00:00, 170.02s/it]

tensor(0.6098, device='cuda:0')





In [None]:
for i, key in enumerate(image_datasets.keys()):
  print(key + " accuracy = ", accuracies[i])

train accuracy =  tensor(0.8961, device='cuda:0')
val accuracy =  tensor(0.6098, device='cuda:0')


###Adversarial Training

In [None]:
#load finetuned model
#model.load_state_dict(torch.load('drive/MyDrive/cs182_project_models/fine-tuned-augmented-train-inception.pt'))
model.load_state_dict(torch.load('drive/MyDrive/chet_models/fine-tuned-inception.pt'))
model.eval()

In [None]:
!pip install torchattacks

Collecting torchattacks
[?25l  Downloading https://files.pythonhosted.org/packages/a5/55/91c60b07daa4538090db811f75a1ab99b6d3db8342965027d76fab361dc7/torchattacks-2.14.2-py3-none-any.whl (92kB)
[K     |███▌                            | 10kB 15.3MB/s eta 0:00:01[K     |███████                         | 20kB 8.7MB/s eta 0:00:01[K     |██████████▋                     | 30kB 6.1MB/s eta 0:00:01[K     |██████████████▏                 | 40kB 3.2MB/s eta 0:00:01[K     |█████████████████▋              | 51kB 3.9MB/s eta 0:00:01[K     |█████████████████████▏          | 61kB 4.2MB/s eta 0:00:01[K     |████████████████████████▊       | 71kB 4.6MB/s eta 0:00:01[K     |████████████████████████████▎   | 81kB 4.7MB/s eta 0:00:01[K     |███████████████████████████████▊| 92kB 5.1MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 4.0MB/s 
[?25hInstalling collected packages: torchattacks
Successfully installed torchattacks-2.14.2


In [None]:
import torchattacks
atks = [torchattacks.FGSM(model, eps=8/255),
        torchattacks.PGD(model, eps=8/255, alpha=2/255, steps=7),
        #torchattacks.Square(model, eps=8/255),
       ]
import random

In [None]:
def train_model_adv(model, dataloaders, criterion, optimizer, num_epochs=3, is_inception=True):
    """
    Takes a pretrained model and trains it using a unique adversarial attack for each epoch
    """
    if torch.cuda.is_available():
      model = model.cuda()
      
    since = time.time()
    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train','val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                # move to gpu
                inputs = inputs.to(device)
                #adv attack
                atk = random.choice(atks)
                inputs = atk(inputs, labels)

                labels.data = labels.data.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()
                
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.

                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)

                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
            torch.save(model_fe.state_dict(), 'drive/MyDrive/chet_models/adverserial-trained-inception-randomized.pt')

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer_fe = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

In [None]:
model_adv, hist = train_model_adv(model_fe, dataloaders_dict, criterion, 
                             optimizer_fe, num_epochs=3, 
                             is_inception=True)

Epoch 0/2
----------
train Loss: 1.0585 Acc: 0.8281
val Loss: 1.4337 Acc: 0.6702
Epoch 1/2
----------
train Loss: 0.8172 Acc: 0.8700
val Loss: 1.4781 Acc: 0.6699
Epoch 2/2
----------
train Loss: 0.6425 Acc: 0.9000
val Loss: 1.5659 Acc: 0.6604
Training complete in 218m 16s
Best val Acc: 0.670200


###Test on Imagenet-a, Imagenet-o data and original Val data



In [None]:
#merge imagenet-a folder with a copy of tiny-imagenet-200
!mkdir tiny-imageneta/
!cp -R tiny-imagenet-200/ tiny-imageneta/

In [None]:
import shutil

In [None]:
!unzip drive/MyDrive/182-cv-project/data/imagenet-a.zip
imagenet_a_path = pathlib.Path('imagenet-a/')
!unzip drive/MyDrive/182-cv-project/data/imagenet-o.zip
imagenet_o_path = pathlib.Path('imagenet-o/')

In [None]:
imagenet_a_classes = np.array([item.name for item in (imagenet_a_path).glob('*')])
imagenet_o_classes = np.array([item.name for item in (imagenet_o_path).glob('*')])

take_imga = [clas for clas in imagenet_a_classes if clas in CLASS_NAMES]
take_imgo = [clas for clas in imagenet_o_classes if clas in CLASS_NAMES]

In [None]:
#copy files from imagenet-a into tiny-imageneta train folder
for dir in take_imga: 
  source_path = os.path.join(imagenet_a_path, dir)
  dest_path = os.path.join(pathlib.Path('tiny-imageneta/tiny-imagenet-200/train/'), dir)

  for file in os.listdir(source_path):
    try: 
      shutil.copy(os.path.join(source_path, file), dest_path)
    except: 
      continue

In [None]:
#merge imagenet-o folder with a copy of tiny-imagenet-200
!mkdir tiny-imageneto/
!cp -R tiny-imagenet-200/ tiny-imageneto/

In [None]:
#copy files from imagenet-a into tiny-imageneta train folder
for dir in take_imgo: 
  source_path = os.path.join(imagenet_o_path, dir)
  dest_path = os.path.join(pathlib.Path('tiny-imageneto/tiny-imagenet-200/train/'), dir)

  for file in os.listdir(source_path):
    try: 
      shutil.copy(os.path.join(source_path, file), dest_path)
    except: 
      continue

In [None]:
#load adversarial model, non randomized
model.load_state_dict(torch.load('drive/MyDrive/cs182_project_models/cs182_project_models/adverserial-trained-inception-no-square.pt'))
model.eval()


In [None]:
#Test adv trained model on original validation data
data_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), 
                                  data_transform),
    'val': ValidationDataset(os.path.join(data_dir, 'val', 'val_annotations.txt'), 
                                      os.path.join(data_dir, 'val', 'images'), data_transform),
}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
orig_val_accuracies = predict(dataloaders_dict, model)

 50%|█████     | 1/2 [17:12<17:12, 1032.58s/it]

tensor(0.3627, device='cuda:0')


100%|██████████| 2/2 [18:56<00:00, 568.01s/it]

tensor(0.3233, device='cuda:0')





In [None]:
#Test adv model on augmented validation data
data_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

image_datasets = {
    'train': datasets.ImageFolder(os.path.join(data_dir, 'train'), 
                                  data_transform),
    'val': ValidationDataset(os.path.join(data_dir, 'val', 'augmented_val_annotations.txt'), 
                                      os.path.join(data_dir, 'val', 'augmented_val_images'), data_transform),
}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
aug_val_accuracies = predict(dataloaders_dict, model)

In [None]:
#Test model on imagenet-a, unseen test dataset
data_transform = transforms.Compose([
        #https://discuss.pytorch.org/t/runtimeerror-stack-expects-each-tensor-to-be-equal-size-but-got-3-224-224-at-entry-0-and-3-224-336-at-entry-3/87211/9
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 
    ])

#train and val are the same, just that the predict function expection train and val keys 
image_datasets = {
    'train': datasets.ImageFolder(imagenet_a_path, 
                                  data_transform),
    'val': datasets.ImageFolder(imagenet_a_path, 
                                  data_transform)
}

dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
imagenet_a_accuracies = predict(dataloaders_dict, model)

 50%|█████     | 1/2 [00:31<00:31, 31.76s/it]

tensor(0.0010, device='cuda:0')


100%|██████████| 2/2 [01:03<00:00, 31.72s/it]

tensor(0.0010, device='cuda:0')





In [None]:
#Test model on imagenet-o
data_transform = transforms.Compose([
        #https://discuss.pytorch.org/t/runtimeerror-stack-expects-each-tensor-to-be-equal-size-but-got-3-224-224-at-entry-0-and-3-224-336-at-entry-3/87211/9
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 
    ])

#train and val are the same, just that the predict function expection train and val keys 
image_datasets = {
    'train': datasets.ImageFolder(imagenet_o_path, 
                                  data_transform),
    'val': datasets.ImageFolder(imagenet_o_path, 
                                  data_transform)
}

dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=batch_size, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
imagenet_o_accuracies = predict(dataloaders_dict, model)

###Test ResNet50 OOB on imagenet-a

In [None]:
model_resnet = torch.hub.load('pytorch/vision:v0.9.0', 'resnet50', pretrained=True)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.9.0
Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




In [None]:
data_transform = transforms.Compose([
        #https://discuss.pytorch.org/t/runtimeerror-stack-expects-each-tensor-to-be-equal-size-but-got-3-224-224-at-entry-0-and-3-224-336-at-entry-3/87211/9
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 
    ])

#train and val are the same, just that the predict function expection train and val keys 
image_datasets = {
    'train': datasets.ImageFolder(imagenet_a_path, 
                                  data_transform),
    'val': datasets.ImageFolder(imagenet_a_path, 
                                  data_transform)
}

dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=2, 
                                                   shuffle=True, num_workers=2) 
                                                   for x in image_datasets.keys()}

In [None]:
predict(dataloaders_dict, model_resnet)




  0%|          | 0/2 [00:00<?, ?it/s][A[A[A


 50%|█████     | 1/2 [00:40<00:40, 40.70s/it][A[A[A

tensor(0.0010, device='cuda:0')





100%|██████████| 2/2 [01:21<00:00, 40.60s/it]

tensor(0.0010, device='cuda:0')





[tensor(0.0010, device='cuda:0'), tensor(0.0010, device='cuda:0')]

In [None]:
sum([1 for dir in os.listdir(imagenet_o_path)])


21