## EECE 5564 Project

### Mid-project report deliverables

- List of initial experiments tried that were listed in the abstract
- List of new experiments and future plans
- 2 page max, ~1 page text ~1 page figures works

In [1]:
!wget https://zenodo.org/record/2535967/files/CIFAR-10-C.tar?download=1

--2022-11-21 20:51:58--  https://zenodo.org/record/2535967/files/CIFAR-10-C.tar?download=1
Resolving zenodo.org (zenodo.org)... 188.185.124.72
Connecting to zenodo.org (zenodo.org)|188.185.124.72|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2918471680 (2.7G) [application/octet-stream]
Saving to: ‘CIFAR-10-C.tar?download=1’


2022-11-21 20:53:46 (25.9 MB/s) - ‘CIFAR-10-C.tar?download=1’ saved [2918471680/2918471680]



### Download CIFAR10C

In [3]:
!tar -xvf /content/CIFAR-10-C.tar?download=1

CIFAR-10-C/
CIFAR-10-C/fog.npy
CIFAR-10-C/jpeg_compression.npy
CIFAR-10-C/zoom_blur.npy
CIFAR-10-C/speckle_noise.npy
CIFAR-10-C/glass_blur.npy
CIFAR-10-C/spatter.npy
CIFAR-10-C/shot_noise.npy
CIFAR-10-C/defocus_blur.npy
CIFAR-10-C/elastic_transform.npy
CIFAR-10-C/gaussian_blur.npy
CIFAR-10-C/frost.npy
CIFAR-10-C/saturate.npy
CIFAR-10-C/brightness.npy
CIFAR-10-C/snow.npy
CIFAR-10-C/gaussian_noise.npy
CIFAR-10-C/motion_blur.npy
CIFAR-10-C/contrast.npy
CIFAR-10-C/impulse_noise.npy
CIFAR-10-C/labels.npy
CIFAR-10-C/pixelate.npy


### Setup Dataloaders

In [4]:
!ls

 CIFAR-10-C  'CIFAR-10-C.tar?download=1'   sample_data


In [6]:
DATA_PATH = "/content/CIFAR-10-C/"

In [23]:
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, Dataset
import torch
import numpy as np
from torchvision import transforms

CORRUPTED_CATEGORIES = [
    "brightness", "contrast", "defocus_blur", "elastic_transform", "fog",
    "frost", "gaussian_blur", "gaussian_noise", "glass_blur", "impulse_noise",
    "jpeg_compression", "motion_blur", "pixelate", "saturate", "shot_noise",
    "snow", "spatter", "speckle_noise", "zoom_blur"
]

CIFAR10_MEAN = [0.4913997551666284, 0.48215855929893703, 0.4465309133731618]
CIFAR10_STD = [0.24703225141799082, 0.24348516474564, 0.26158783926049628]

IMAGE_SIZE = 224, 224

train_transform = transforms.Compose([
    transforms.Pad(4, padding_mode='reflect'),
    transforms.RandomHorizontalFlip(),
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=CIFAR10_MEAN, std=CIFAR10_STD)
])


test_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=CIFAR10_MEAN, std=CIFAR10_STD)
])

class CIFAR10C(Dataset):
    """
    CIFAR10C Test Dataset: https://arxiv.org/abs/1903.12261

    args: provide a category [string] or simplely 'all' if for the entire dataset
    """
    def __init__(self, category="fog"):
        assert category in CORRUPTED_CATEGORIES + ['all']
        if category == 'all':
            self.data = np.concatenate([np.load(f"{DATA_PATH}{category}.npy") for category in CORRUPTED_CATEGORIES], axis=0)
            self.targets = np.tile(np.load(f"{DATA_PATH}/labels.npy"), reps=len(CORRUPTED_CATEGORIES))
        else:
            self.data = np.load(f"{DATA_PATH}{category}.npy")
            self.targets = np.load(f"{DATA_PATH}/labels.npy")
        
        self.transform = test_transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.targets[index]
        x = self.transform(x)
        return x, y
    
    def __len__(self):
        return len(self.data)

def cifar10_dataloaders(batch_size=32, num_workers=2):
    train_data = CIFAR10(root=DATA_PATH, download=True, train=True, transform=train_transform)
    test_data = CIFAR10(root=DATA_PATH, train=False, transform=test_transform)

    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=num_workers, drop_last=True)
    test_dataloader = DataLoader(test_data, batch_size=batch_size, pin_memory=True, num_workers=num_workers)
    return train_dataloader, test_dataloader

def cifar10c_dataloaders(batch_size=32, num_workers=2, category='all'):
    dataset = CIFAR10C(category=category)
    dataloader = DataLoader(dataset, batch_size=batch_size, pin_memory=True, num_workers=num_workers)
    return None, dataloader

### Input Visualization

TODO visualize original images for the final report for potential figures

### Feature Visualization

Using a pre-trained networks as feature extractors, we can visualize the class-conditional features with varying corruptions.

We could do some small comparison between different feature extractors, but that is probably not the focus of our work.

#### Load Model

In [10]:
from torchvision.models import resnet18

model = resnet18(pretrained=True, num_classes=0)
model.cuda()
model.eval()

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

#### Data Loaders

In [24]:
# standard cifar10
cifar10_train, cifar10_test = cifar10_dataloaders()

# dataloader for each corruption in cifar10c
cifar10c = dict()
for corruption in CORRUPTED_CATEGORIES:
  _, cifar10c_test = cifar10c_dataloaders(category=corruption)
  cifar10c[corruption] = cifar10c_test

Files already downloaded and verified


#### Extract Features

We will extract the features for each image, to be sorted by class label and corruptions

In [25]:
from tqdm import tqdm

# standard cifar10
cifar10_features = []
cifar10_labels = []
for image, label in tqdm(cifar10_test):
    image.cuda()
    features = model(image).detach().cpu().numpy()
    cifar10_features.append(features)
    cifar10_labels.append(label)
cifar10_features = np.concatenate(cifar10_features, axis=0)
cifar10_labels = np.concatenate(cifar10_labels, axis=0)

# cifar10c
cifar10c_features = {}
cifar10c_labels = {}
for corruption in CORRUPTED_CATEGORIES:
  for image, label in tqdm(cifar10c[corruption]):
    features = model(image).detach().cpu().numpy()
    cifar10c_features[corruption].append(features)
    cifar10c_labels[corruption].append(label)
  cifar10c_features[corruption] = np.concatenate(cifar10c_features[corruption], axis=0)
  cifar10c_labels[corruption] = np.concatenate(cifar10c_labels[corruption], axis=0)

  2%|▏         | 5/313 [00:14<14:44,  2.87s/it]


KeyboardInterrupt: ignored

#### Save Features to File

In [None]:
import pickle

with open('cifar10_features.pickle', 'wb') as handle:
    pickle.dump(cifar10_features, handle, protocol=pickle.HIGHEST_PROTOCOL)
  
with open('cifar10_labels.pickle', 'wb') as handle:
    pickle.dump(cifar10c_features, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('cifar10c_features.pickle', 'wb') as handle:
    pickle.dump(cifar10c_features, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('cifar10_labels.pickle', 'wb') as handle:
    pickle.dump(cifar10c_labels, handle, protocol=pickle.HIGHEST_PROTOCOL)

#### PCA Visualization

Visualize:
1. Clusters of all features per class on standard CIFAR10
2. For each class in CIFAR10C, visualize the features per each corruption

In [None]:
from sklearn.decomposition import PCA

