<a href="https://colab.research.google.com/github/bhanup6663/COMP691_DL/blob/Niloofar/Untitled16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [131]:
#CELL 1
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Subset, DataLoader
from numpy.random import RandomState
from sklearn.metrics import classification_report
from torch.optim.lr_scheduler import StepLR

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cpu


In [132]:
# CELL 2
from torchvision import models
import torch.nn as nn
import torch.nn.functional as F

class ModifiedResNet18(nn.Module):
    def __init__(self, num_classes=10, dropout_rate=0.5):
        super(ModifiedResNet18, self).__init__()
        self.resnet = models.resnet18(pretrained=False)
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, 256),  # Reduced size
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(256),  # Adjusted to new size
            nn.Dropout(dropout_rate),
            nn.Linear(256, num_classes)  # Adjusted to new size
        )

    def forward(self, x):
        return self.resnet(x)



def init_weights(m): ##new try
    if isinstance(m, nn.Linear):
        torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
        if m.bias is not None:
            m.bias.data.fill_(0.01)



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModifiedResNet18(num_classes=10, dropout_rate=0.5).to(device)

In [133]:
# CELL 3

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    normalize,
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    normalize,
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

for images, labels in train_loader:
    if torch.isnan(images).any():
        print("NaN values found in training images")
    if torch.isnan(labels).any():
        print("NaN values found in training labels")

for images, labels in test_loader:
    if torch.isnan(images).any():
        print("NaN values found in test images")
    if torch.isnan(labels).any():
        print("NaN values found in test labels")

Files already downloaded and verified
Files already downloaded and verified


In [134]:
#CELL 4
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])
])
transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.247, 0.243, 0.261])
])
cifar_train = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
cifar_val = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)




Files already downloaded and verified
Files already downloaded and verified


In [135]:
#CELL 5
def create_data_loaders(seed, samples_per_class=25):
    np.random.seed(seed)
    torch.manual_seed(seed)

    selected_classes = np.random.choice(10, size=2, replace=False)
    train_indices = []
    val_indices = []

    for i in selected_classes:
        class_indices = np.where(np.array(cifar_train.targets) == i)[0]
        np.random.shuffle(class_indices)
        train_indices.extend(class_indices[:samples_per_class])
        val_indices.extend(class_indices[samples_per_class:samples_per_class + 100])

    train_indices = [i for i in train_indices if i < len(cifar_train)]
    val_indices = [i for i in val_indices if i < len(cifar_val)]

    train_subset = Subset(cifar_train, train_indices)
    val_subset = Subset(cifar_val, val_indices)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

    return train_loader, val_loader

In [136]:
#CELL 6

fixed_seed = 1
train_loader, val_loader = create_data_loaders(fixed_seed)

train_class_counts = np.zeros(10, dtype=int)
val_class_counts = np.zeros(10, dtype=int)

for _, labels in train_loader:
    labels_numpy = labels.numpy()
    train_class_counts += np.bincount(labels_numpy, minlength=10)

for _, labels in val_loader:
    labels_numpy = labels.numpy()
    val_class_counts += np.bincount(labels_numpy, minlength=10)

#class_weights = 1.0 / torch.tensor(train_class_counts, dtype=torch.float)
class_weights = 1.0 / torch.tensor(train_class_counts, dtype=torch.float)
class_weights[torch.isinf(class_weights)] = 0
#class_weights[torch.isnan(class_weights)] = 0
class_weights = class_weights.to(device)

print("Class weights:", class_weights)
print(f"Training class counts: {train_class_counts}")
print(f"Validation class counts: {val_class_counts}")

Class weights: tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002,
        0.0002])
Training class counts: [5000 5000 5000 5000 5000 5000 5000 5000 5000 5000]
Validation class counts: [2 5 5 6 4 4 1 3 3 4]


In [137]:
#CELL 7

def mixup_data(x, y, alpha=1.0, device='cuda'):
    '''Compute the mixup data. Return mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

In [138]:
#CELL 8
from torch.nn.utils import clip_grad_value_

def train(model, device, train_loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
       # clip_grad_value_(model.parameters(), 0.5)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5)
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(output, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
    train_loss = running_loss / len(train_loader)
    train_acc = correct / total * 100
    return train_loss, train_acc


In [139]:
#CELL 9
from torch.utils.data import DataLoader, Subset

def test(model, device, test_dataset, criterion, num_random_samples=10):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_targets = []

    random_indices = torch.randperm(len(test_dataset))[:num_random_samples]
    random_subset = Subset(test_dataset, random_indices)
    random_loader = DataLoader(random_subset, batch_size=num_random_samples, shuffle=False)

    with torch.no_grad():
        for data, targets in random_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            loss = criterion(outputs, targets)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            all_preds.extend(predicted.view(-1).cpu().numpy())
            all_targets.extend(targets.view(-1).cpu().numpy())

    test_loss = running_loss / len(random_loader)
    test_acc = 100. * correct / total
    return test_loss, test_acc, all_targets, all_preds


In [140]:
# CELL 10

def validate(model, device, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, targets in val_loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    # Calculate average loss and accuracy
    avg_loss = running_loss / len(val_loader)
    accuracy = 100. * correct / total

    return avg_loss, accuracy


In [141]:
# CELL 11
import torch
from torch import optim, nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import classification_report

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


model = ModifiedResNet18(num_classes=10, dropout_rate=0.5).to(device)
model.apply(init_weights)


# optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
#optimizer = torch.optim.Adam(model.parameters(), lr=0.00001, weight_decay=1e-4)  # Reduced learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss(weight=class_weights).to(device)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

num_epochs = 10
best_val_acc = 0
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, device, train_loader, optimizer, criterion)
    val_loss, val_acc = validate(model, device, val_loader, criterion)
    print(f'Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')


 #   print(f'Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')
    if val_acc > best_val_acc:
        print(f"Saving new best model at epoch {epoch+1} with Val Acc: {val_acc:.2f}%")
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')

    scheduler.step(val_loss)

test_loss, test_acc, _, _ = test(model, device, test_dataset, criterion, num_random_samples=10)
print(f'Final Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%')

Using device: cpu
Epoch 1/10 - Train Loss: 3.3822, Train Acc: 13.29%, Val Loss: 2.4124, Val Acc: 18.92%
Saving new best model at epoch 1 with Val Acc: 18.92%
Epoch 2/10 - Train Loss: 3.0048, Train Acc: 17.74%, Val Loss: 2.2479, Val Acc: 29.73%
Saving new best model at epoch 2 with Val Acc: 29.73%
Epoch 3/10 - Train Loss: 2.7435, Train Acc: 21.09%, Val Loss: 2.0334, Val Acc: 35.14%
Saving new best model at epoch 3 with Val Acc: 35.14%
Epoch 4/10 - Train Loss: 2.5660, Train Acc: 23.25%, Val Loss: 1.7735, Val Acc: 43.24%
Saving new best model at epoch 4 with Val Acc: 43.24%
Epoch 5/10 - Train Loss: 2.4018, Train Acc: 25.71%, Val Loss: 1.8292, Val Acc: 35.14%
Epoch 6/10 - Train Loss: 2.2804, Train Acc: 27.82%, Val Loss: 1.8891, Val Acc: 43.24%


KeyboardInterrupt: 