In [97]:
!pip install -r requirements.txt



In [98]:
import csv
import os
import torch
import torchvision
import tarfile
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as tt
from torch.utils.data import random_split
from torchvision.utils import make_grid
import matplotlib
import matplotlib.pyplot as plt
from torchvision.transforms import ToTensor
%matplotlib inline

matplotlib.rcParams['figure.facecolor'] = '#ffffff'

In [99]:
import datasets
import torchvision.transforms as transforms

tensor_transform = transforms.Compose([transforms.ToTensor()])
dataset = datasets.C100Dataset('dataset/data/cifar100_nl.csv', 'dataset/data/cifar100_nl_test.csv')
[trainData, trainLabels, testData, testLabels] = dataset.getDataset()




49999
49999
9999
9999


In [100]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

# Assuming trainData, trainLabels, valData, and valLabels are numpy arrays
trainData, valData, trainLabels, valLabels = train_test_split(trainData, trainLabels, test_size=0.2, random_state=42)

trainData = trainData / 255.0
valData = valData / 255.0
testData = testData / 255.0

import numpy as np

# Assuming trainData is your numpy array containing the training data

# Calculate the mean
mean = np.mean(trainData, axis=(0, 1, 2))  # Compute mean along each channel

# Calculate the standard deviation
std = np.std(trainData, axis=(0, 1, 2))  # Compute standard deviation along each channel

print("Mean:", mean)
print("Standard Deviation:", std)

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),    # Convert to tensor
    transforms.Normalize((0.4867,), (0.2674,))    # Normalize the data
])


Mean: [0.50764684 0.48674372 0.44104357]
Standard Deviation: [0.26745035 0.25658456 0.27634654]


In [101]:

trainData = torch.stack([transform(image) for image in trainData])
valData = torch.stack([transform(image) for image in valData])
testData = torch.stack([transform(image) for image in testData])

# Convert numpy arrays to torch tensors
# trainData = torch.from_numpy(trainData)
trainLabels = torch.from_numpy(trainLabels)
# valData = torch.from_numpy(valData)
valLabels = torch.from_numpy(valLabels)
# testData = torch.from_numpy(testData)
testLabels = torch.from_numpy(testLabels)


# Label smoothing
label_smoothing = .2
num_classes = 100

if label_smoothing > 0:
    trainLabels = trainLabels * (1 - label_smoothing) + label_smoothing / num_classes
    valLabels = valLabels * (1 - label_smoothing) + label_smoothing / num_classes
    testLabels = testLabels * (1 - label_smoothing) + label_smoothing / num_classes


# Convert to float
trainData = trainData.to(torch.float32)
trainLabels = trainLabels.to(torch.long)
valData = valData.to(torch.float32)
valLabels = valLabels.to(torch.long)
testData = testData.to(torch.float32)
testLabels = testLabels.to(torch.long)

# Create TensorDatasets
trainDataset = TensorDataset(trainData, trainLabels)
valDataset = TensorDataset(valData, valLabels)
testDataset = TensorDataset(testData, testLabels)

# Create DataLoaders
trainLoader = DataLoader(trainDataset, batch_size=64, shuffle=True)
valLoader = DataLoader(valDataset, batch_size=64, shuffle=False)
testLoader = DataLoader(testDataset, batch_size=64, shuffle=False)


In [102]:


train_iter = iter(trainLoader)
images, labels = next(train_iter)

images[0].shape


torch.Size([3, 32, 32])

Move to CUDA 

In [103]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
device = get_default_device()

def to_device(entity, device):
    """Move tensor(s) to chosen device"""
    if isinstance(entity, (list,tuple)):
        return [to_device(x, device) for x in entity]
    return entity.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, loader, device):
        self.loader = loader
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for entity in self.loader:
            yield to_device(entity, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.loader)
    
trainLoader = DeviceDataLoader(trainLoader, device)
testLoader = DeviceDataLoader(testLoader, device)
valLoader = DeviceDataLoader(valLoader, device)

Model Architecture

In [105]:
## ResNet

def conv_block(in_channels, out_channels, pool = False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size = 3, padding = 1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace = True)]

    if pool:
        layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class ResNet(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()

        self.conv1 = conv_block(in_channels, 64)
        self.conv2 = conv_block(64, 128, pool = True)
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))

        self.conv3 = conv_block(128, 256, pool = True)
        self.conv4 = conv_block(256, 512, pool = True)
        self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))

        self.classifier = nn.Sequential(nn.MaxPool2d(4),
                                        nn.Flatten(),
                                        nn.Dropout(0.2),
                                        nn.Linear(512, num_classes))
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out

        #mixed_inputs, mixed_targets = divide_and_mix(out, targets, alpha)

        #out = self.classifier(out)
        return self.classifier(out)


In [113]:
from PreResNet import PreResNet
from PreResNet import BasicBlock
from cnn import CNN

model = CNN(n_outputs=100)

Model training

In [114]:
def accuracy(outputs, labels):
    pred, predClassId = torch.max(outputs, dim = 1)
    return torch.tensor(torch.sum(predClassId == labels).item() / len(predClassId))

from tqdm import tqdm
import time

def train(model, train_dl, val_dl, epochs, max_lr, loss_func, optim, warmup_epochs, mixup_alpha=0.0):
    optimizer = optim(model.parameters(), max_lr)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epochs * len(train_dl),
                                                    steps_per_epoch=len(train_dl))

    results = []
    lrs = []
    for epoch in range(epochs):
        model.train()
        train_losses = []
        lrs = []

        if epoch < warmup_epochs:
            warmup_lr = max_lr * (epoch + 1) / warmup_epochs
            for param_group in optimizer.param_groups:
                param_group['lr'] = warmup_lr

        progress_bar = tqdm(total=len(train_dl), desc=f"Epoch {epoch + 1}/{epochs}")
        start_time = time.time()

        for images, labels in train_dl:
            if mixup_alpha != 0:
                mixed_inputs, mixed_labels = mixup_data(images, labels, mixup_alpha)
                logits = model(mixed_inputs)
                loss = loss_func(logits, mixed_labels)
            else:
                logits = model(images)
                loss = loss_func(logits, labels)

            loss.backward()
            train_losses.append(loss)
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()
            lrs.append(optimizer.param_groups[0]['lr'])

            progress_bar.set_postfix({'lr': optimizer.param_groups[0]['lr'], 'loss': loss.item()})
            progress_bar.update()

        epoch_train_loss = torch.stack(train_losses).mean()

        model.eval()
        batch_losses = []
        batch_accs = []
        for images, labels in val_dl:
            with torch.no_grad():
                logits = model(images)
                loss = loss_func(logits, labels)
                batch_losses.append(loss)
                batch_accs.append(accuracy(logits, labels))

        batch_losses = torch.stack(batch_losses)
        batch_accs = torch.stack(batch_accs)

        # Apply small-loss selection
        sorted_indices = torch.argsort(batch_losses)
        small_loss_indices = sorted_indices[:len(val_dl) // 2]  # Choose top half with smallest losses

        small_loss_images = torch.index_select(val_dl.dataset.data, 0, small_loss_indices)
        small_loss_labels = torch.index_select(val_dl.dataset.targets, 0, small_loss_indices)

        train_dl_small_loss = DataLoader(
            TensorDataset(small_loss_images, small_loss_labels),
            batch_size=train_dl.batch_size,
            shuffle=True,
            num_workers=train_dl.num_workers
        )

        # Retrain the model on small-loss samples
        for images, labels in train_dl_small_loss:
            if mixup_alpha != 0:
                mixed_inputs, mixed_labels = mixup_data(images, labels, mixup_alpha)
                logits = model(mixed_inputs)
                loss = loss_func(logits, mixed_labels)
            else:
                logits = model(images)
                loss = loss_func(logits, labels)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()

        end_time = time.time()
        elapsed_time = end_time - start_time
        iterations_per_second = len(train_dl) / elapsed_time
        progress_bar.set_postfix({'lr': optimizer.param_groups[0]['lr'], 'loss': loss.item(),
                                  'it/s': iterations_per_second})
        progress_bar.close()

        results.append({'avg_validation_loss': batch_losses.mean().item(), 'avg_train_loss': epoch_train_loss.item(),
                        'avg_validation_accuracy': batch_accs.mean().item()})

    return results, lrs

In [117]:
# Parameters

model = to_device(model, device)
epochs = 10
max_lr = 1e-2
loss_func = F.cross_entropy
optim = torch.optim.Adam
warmup_epochs = 2 # Number of warmup epochs for the learning rate scheduler
mixup_alpha = 0 # Mixup alpha value, 0 for off

# Add these to an array to print
parameters = [epochs, max_lr, loss_func, optim, warmup_epochs, mixup_alpha]

In [118]:
results, lrs = train(model, trainLoader, valLoader, epochs, max_lr, loss_func, optim, warmup_epochs, mixup_alpha)
print(parameters)
for i, result in enumerate(results):
    print("Epoch {}: {:.4f}".format(i + 1, result["avg_validation_accuracy"]))



[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[10, 0.01, <function cross_entropy at 0x00000270124427A0>, <class 'torch.optim.adam.Adam'>, 2, 0]
Epoch 1: 0.3132
Epoch 2: 0.3112
Epoch 3: 0.3107
Epoch 4: 0.3053
Epoch 5: 0.2882
Epoch 6: 0.2827
Epoch 7: 0.2736
Epoch 8: 0.2755
Epoch 9: 0.2714
Epoch 10: 0.2565





In [110]:
def plot(results, pairs):
    fig, axes = plt.subplots(1, len(pairs), figsize = (len(pairs) * 10, 5))
    for i, pair in enumerate(pairs):
        for title, graphs in pair.items():
            axes[i].title = title
            axes[i].legend = graphs
            for graph in graphs:
                axes[i].plot([result[graph] for result in results])
    
plot(results, [{"Accuracy vs. Epochs": ["avg_validation_accuracy"]}, {"Loss vs. Epochs": ["avg_validation_loss", "avg_train_loss"]}, {"Learning Rate vs. Epochs": ["lrs"]}])

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

Error in callback <function _draw_all_if_interactive at 0x0000027016B441F0> (for post_execute):


AttributeError: 'str' object has no attribute 'sticky_edges'

AttributeError: 'str' object has no attribute 'get_position'

<Figure size 3000x500 with 3 Axes>

Evaluation

Report