In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

import torch
import torchvision.datasets as datasets
import kornia as K
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from torchvision import utils
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torchvision.utils import make_grid

from PIL import Image

from utils.data import ZCA_Loader

In [2]:
class TinyImageNetTrainDataset(Dataset):
    def __init__(self, root_dir, wnids_file, transform=None):
        self.root_dir = root_dir  # Path to the "train" directory
        self.transform = transform
        self.class_to_idx = self._load_class_mapping(wnids_file)
        self.images = []
        self.labels = []
        
        # Load image paths and their corresponding labels
        self._load_data()

    def _load_class_mapping(self, wnids_file):
        # Load class IDs from wnids.txt and map to indices
        with open(wnids_file, 'r') as f:
            wnids = f.read().splitlines()
        return {wnid: idx for idx, wnid in enumerate(wnids)}

    def _load_data(self):
        # Iterate over each class folder to load images and labels
        for class_name in self.class_to_idx.keys():
            class_folder = os.path.join(self.root_dir, class_name, "images")
            # Check if the class folder exists
            if os.path.isdir(class_folder):
                # Get all JPEG images in the class folder
                for filename in os.listdir(class_folder):
                    if filename.endswith('.JPEG'):
                        self.images.append(os.path.join(class_folder, filename))
                        self.labels.append(self.class_to_idx[class_name])  # Append the index corresponding to the class

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')  # Ensure images are in RGB format
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label
    
def parse_val_annotations(annotation_file):
    label_map = {}
    with open(annotation_file, 'r') as f:
        for line in f:
            parts = line.strip().split('\t')
            filename, label = parts[0], parts[1]
            label_map[filename] = label
    return label_map

# Step 1: Load class IDs from wnids.txt to get a consistent mapping
def load_class_mapping(wnids_file):
    with open(wnids_file, 'r') as f:
        wnids = [line.strip() for line in f]
    class_to_idx = {wnid: idx for idx, wnid in enumerate(wnids)}
    return class_to_idx



# Step 2: Update the custom dataset to use the class mapping
class TinyImageNetDataset(Dataset):
    def __init__(self, image_dir, annotation_file, wnids_file, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.label_map = parse_val_annotations(annotation_file)
        self.class_to_idx = load_class_mapping(wnids_file)
        
        self.image_filenames = list(self.label_map.keys())
    
    def __len__(self):
        return len(self.image_filenames)
    
    def __getitem__(self, idx):
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert("RGB")
        
        # Convert label name to index using the consistent class mapping
        label_name = self.label_map[img_name]
        label = self.class_to_idx[label_name]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

#labels = parse_val_annotations('data/tiny-imagenet-200-DPCN-u3/val/val_annotations.txt')
#class_to_idx = load_class_mapping('data/tiny-imagenet-200-DPCN-u3/wnids.txt')

In [3]:
layer = 'u2'

#traindir = f'data/tiny-imagenet-200-DPCN-MLCSC-L6/train'
batch_size = 32
#transform = transforms.Compose([
    #transforms.Resize(256),  # Resize images to 256x256
    #transforms.CenterCrop(224),  # Crop the center 224x224
    #transforms.Resize((64, 64)),  # Resize images to 64x64
#    transforms.ToTensor(),  # Convert PIL image to tensor
#    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize with ImageNet stats
#                         std=[0.229, 0.224, 0.225])
#])

transform = transforms.Compose([
    #transforms.RandomResizedCrop(64, scale=(0.8, 1.0)),
    #transforms.RandomHorizontalFlip(p=0.5),
    #transforms.RandomRotation(degrees=15),
    #transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    #transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
    transforms.ToTensor(), 
    # transforms.Normalize(mean, std)
])

#train_dataset = datasets.ImageFolder(
#        traindir,
#        transform = transform
#    )

#train_loader = torch.utils.data.DataLoader(
#        train_dataset, batch_size=batch_size, shuffle=True,
#        num_workers=8, pin_memory=True)


wnids_file = f'data/tiny-imagenet-200-DPCN-{layer}/wnids.txt'
annotation_file = f'data/tiny-imagenet-200-DPCN-{layer}/val/val_annotations.txt'
testdir = f'data/tiny-imagenet-200-MLCSC-L6/val/images'
traindir = f'data/tiny-imagenet-200-MLCSC-L6/train'


train_dataset = TinyImageNetTrainDataset(traindir, wnids_file, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)


test_dataset = TinyImageNetDataset(image_dir=testdir, annotation_file=annotation_file, wnids_file=wnids_file, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)


In [4]:
print(train_dataset.class_to_idx)
print(test_dataset.class_to_idx)

{'n02124075': 0, 'n04067472': 1, 'n04540053': 2, 'n04099969': 3, 'n07749582': 4, 'n01641577': 5, 'n02802426': 6, 'n09246464': 7, 'n07920052': 8, 'n03970156': 9, 'n03891332': 10, 'n02106662': 11, 'n03201208': 12, 'n02279972': 13, 'n02132136': 14, 'n04146614': 15, 'n07873807': 16, 'n02364673': 17, 'n04507155': 18, 'n03854065': 19, 'n03838899': 20, 'n03733131': 21, 'n01443537': 22, 'n07875152': 23, 'n03544143': 24, 'n09428293': 25, 'n03085013': 26, 'n02437312': 27, 'n07614500': 28, 'n03804744': 29, 'n04265275': 30, 'n02963159': 31, 'n02486410': 32, 'n01944390': 33, 'n09256479': 34, 'n02058221': 35, 'n04275548': 36, 'n02321529': 37, 'n02769748': 38, 'n02099712': 39, 'n07695742': 40, 'n02056570': 41, 'n02281406': 42, 'n01774750': 43, 'n02509815': 44, 'n03983396': 45, 'n07753592': 46, 'n04254777': 47, 'n02233338': 48, 'n04008634': 49, 'n02823428': 50, 'n02236044': 51, 'n03393912': 52, 'n07583066': 53, 'n04074963': 54, 'n01629819': 55, 'n09332890': 56, 'n02481823': 57, 'n03902125': 58, 'n0340

# Define ResNet 9

In [5]:
import torch.nn as nn
import torch.nn.functional as F

def conv_block(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class ResNet9(nn.Module):
    def __init__(self, in_channels=3, num_classes=200):
        super().__init__()
        
        self.conv1 = conv_block(in_channels, 64)
        self.conv2 = conv_block(64, 128, pool=True)
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
        
        self.conv3 = conv_block(128, 256, pool=True)
        self.conv4 = conv_block(256, 512, pool=True)
        self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
        
        self.classifier = nn.Sequential(nn.MaxPool2d(4), 
                                        nn.Flatten(), 
                                        nn.Linear(2048, num_classes))
        
    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out
        out = self.classifier(out)
        return out


In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # Check if GPU is available

net = ResNet9()
net = net.to(device)
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(net.parameters(), lr=0.001)

interval = 200
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        
        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total = labels.size(0)
        correct = (predicted == labels).sum().item()
        accuracy = correct / total
        # print statistics
        running_loss += loss.item()
        if i % interval == interval-1:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / interval:.3f} accuracy: {accuracy*100:.3f}')
            running_loss = 0.0
            
    # Validation loss and accuracy
    net.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            #_, predicted = torch.max(outputs.data, 1)
            _, predicted = outputs.topk(5, 1, True, True)
            total += labels.size(0)
            correct += (predicted == labels.view(-1, 1)).sum().item()

    print(f'Validation loss: {val_loss / len(test_loader):.3f} accuracy: {correct / total * 100:.3f}\n')

print('Finished Training')

[1,   200] loss: 6.106 accuracy: 3.125
[1,   400] loss: 5.421 accuracy: 0.000
[1,   600] loss: 5.198 accuracy: 0.000
[1,   800] loss: 5.072 accuracy: 0.000
[1,  1000] loss: 4.895 accuracy: 3.125
[1,  1200] loss: 4.820 accuracy: 0.000
[1,  1400] loss: 4.789 accuracy: 6.250
[1,  1600] loss: 4.780 accuracy: 0.000
[1,  1800] loss: 4.764 accuracy: 3.125
[1,  2000] loss: 4.744 accuracy: 6.250
[1,  2200] loss: 4.744 accuracy: 0.000
[1,  2400] loss: 4.740 accuracy: 0.000
[1,  2600] loss: 4.753 accuracy: 0.000
[1,  2800] loss: 4.734 accuracy: 0.000
[1,  3000] loss: 4.720 accuracy: 3.125
Validation loss: 28.565 accuracy: 2.500

[2,   200] loss: 4.817 accuracy: 3.125
[2,   400] loss: 4.804 accuracy: 3.125
[2,   600] loss: 4.773 accuracy: 0.000
[2,   800] loss: 4.773 accuracy: 3.125
[2,  1000] loss: 4.765 accuracy: 0.000
[2,  1200] loss: 4.767 accuracy: 0.000
[2,  1400] loss: 4.753 accuracy: 6.250
[2,  1600] loss: 4.754 accuracy: 6.250
[2,  1800] loss: 4.751 accuracy: 0.000
[2,  2000] loss: 4.759 

In [8]:
# test the model on training data
correct = 0
total = 0
with torch.no_grad():
    for data in train_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print(f'Accuracy of the network on the training images: {100 * correct / total} %')

Accuracy of the network on the training images: 2.372 %


# Test the model on test data

In [15]:
# test on test data
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print(f'Accuracy of the network on the test images: {100 * correct / total} %')

Accuracy of the network on the test images: 29.27 %


In [10]:
# save model to "/saved_models/resnet9_u3_tinyimagenet.pth"
#torch.save(net.state_dict(), "saved_models/resnet9_u2_tinyimagenet.pth")  # Save the model

# Train Using Original Data

In [4]:
layer = 'u2'

traindir = f'data/tiny-imagenet-200-DPCN-{layer}/train'
batch_size = 32
#transform = transforms.Compose([
    #transforms.Resize(256),  # Resize images to 256x256
    #transforms.CenterCrop(224),  # Crop the center 224x224
    #transforms.Resize((64, 64)),  # Resize images to 64x64
#    transforms.ToTensor(),  # Convert PIL image to tensor
#    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # Normalize with ImageNet stats
#                         std=[0.229, 0.224, 0.225])
#])

transform = transforms.Compose([
    #transforms.RandomResizedCrop(64, scale=(0.8, 1.0)),
    #transforms.RandomHorizontalFlip(p=0.5),
    #transforms.RandomRotation(degrees=15),
    #transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    #transforms.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0)),
    transforms.ToTensor(), 
    transforms.Normalize([0.4802, 0.4481, 0.3975], [0.2302, 0.2265, 0.2262])
    # transforms.Normalize(mean, std)
])

#train_dataset = datasets.ImageFolder(
#        traindir,
#        transform = transform
#    )

#train_loader = torch.utils.data.DataLoader(
#        train_dataset, batch_size=batch_size, shuffle=True,
#        num_workers=8, pin_memory=True)


wnids_file = f'data/tiny-imagenet-200/wnids.txt'
annotation_file = f'data/tiny-imagenet-200/val/val_annotations.txt'
testdir = f'data/tiny-imagenet-200/val/images'
traindir = f'data/tiny-imagenet-200/train'


train_dataset = TinyImageNetTrainDataset(traindir, wnids_file, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)


test_dataset = TinyImageNetDataset(image_dir=testdir, annotation_file=annotation_file, wnids_file=wnids_file, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # Check if GPU is available

net = ResNet9()
net = net.to(device)
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(net.parameters(), lr=0.001)

interval = 200
for epoch in range(7):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        
        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total = labels.size(0)
        correct = (predicted == labels).sum().item()
        accuracy = correct / total
        # print statistics
        running_loss += loss.item()
        if i % interval == interval-1:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / interval:.3f} accuracy: {accuracy*100:.3f}')
            running_loss = 0.0
            
    # Validation loss and accuracy
    net.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            #_, predicted = torch.max(outputs.data, 1)
            _, predicted = outputs.topk(5, 1, True, True)
            total += labels.size(0)
            correct += (predicted == labels.view(-1, 1)).sum().item()

    print(f'Validation loss: {val_loss / len(test_loader):.3f} accuracy: {correct / total * 100:.3f}\n')

print('Finished Training')

[1,   200] loss: 6.910 accuracy: 0.000
[1,   400] loss: 5.638 accuracy: 0.000
[1,   600] loss: 5.231 accuracy: 3.125
[1,   800] loss: 4.912 accuracy: 6.250
[1,  1000] loss: 4.736 accuracy: 9.375
[1,  1200] loss: 4.615 accuracy: 3.125
[1,  1400] loss: 4.462 accuracy: 3.125
[1,  1600] loss: 4.365 accuracy: 9.375
[1,  1800] loss: 4.192 accuracy: 15.625
[1,  2000] loss: 4.115 accuracy: 9.375
[1,  2200] loss: 4.008 accuracy: 25.000
[1,  2400] loss: 3.925 accuracy: 15.625
[1,  2600] loss: 3.828 accuracy: 15.625
[1,  2800] loss: 3.708 accuracy: 15.625
[1,  3000] loss: 3.680 accuracy: 12.500
Validation loss: 3.569 accuracy: 45.710

[2,   200] loss: 3.629 accuracy: 21.875
[2,   400] loss: 3.495 accuracy: 25.000
[2,   600] loss: 3.371 accuracy: 25.000
[2,   800] loss: 3.333 accuracy: 21.875
[2,  1000] loss: 3.255 accuracy: 21.875
[2,  1200] loss: 3.224 accuracy: 21.875
[2,  1400] loss: 3.167 accuracy: 43.750
[2,  1600] loss: 3.148 accuracy: 37.500
[2,  1800] loss: 3.113 accuracy: 25.000
[2,  200

In [6]:
# test the model on training data
correct = 0
total = 0
with torch.no_grad():
    for data in train_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print(f'Accuracy of the network on the training images: {100 * correct // total} %')

Accuracy of the network on the training images: 89 %


In [None]:
# test on test data
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print(f'Accuracy of the network on the test images: {100 * correct / total} %')

Accuracy of the network on the test images: 39 %
