## Install packages

In [9]:
# import import_ipynb
import os
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR,ReduceLROnPlateau
# from models.resnet import ResNet18, ResNet34, ResNet50
from models.resnet_soyeong import ResNet34, ResNet18, ResNet50
from models.densenet import DenseNet3
import torch
# import dataloader # custom

## Checking and setting CUDA device

In [10]:
device = "cuda:0"
log_file = "./training_logs.txt"
 
def eval_accuracy(model,log_file ,m_name,id_name, loader, test_data_name = " ", device = "cuda"):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    with open(log_file, 'a') as file:
        file.write(f'{test_data_name} Test Accuracy ({m_name,id_name}): {100 * correct / total:.2f}% \n')
    print(f'{test_data_name} Test Accuracy ({m_name,id_name}): {100 * correct / total:.2f}%')


## Get DataLoader

In [11]:
from torch.utils.data import DataLoader, Subset, TensorDataset, RandomSampler
from torchvision import datasets
def get_loader(dataset,batch_size,id_name,train = False):
    '''
    dataset = name of dataset for which loader is required\
    batch_size = batch size of the dataloader
    id = indistribution data.

    output -> 
    train_loader , testloader (RGB)
    '''
    data_root='./data' 
    print(f"loader requested for {dataset}, to be used on model trained for {id_name} ")
    # Transformations
    #tranformations for OOD and ADV samples:
    # Transformations for Id dataset (training dataset)


    '''
    id = cifar 10  tranform = cifar10
    ood= mnist tranform = cifar10


    id = mnist tranform =mnist
    ood= = cifar10 tranform = mnist
    '''
    if id_name == 'svhn':
        input_size = (32, 32) 
        transform_id = transforms.Compose([
            transforms.Resize(input_size), 
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            # transforms.Resize(input_size), 
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Common for SVHN
        ])
    elif id_name == 'cifar10':
        input_size = (32, 32) 
        transform_id  = transforms.Compose([
            transforms.Resize(input_size), 
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),   
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))   
        ])
    elif id_name in [ 'mnist','fmnist','kmnist','qmnist']:
        input_size = (32, 32) 
        transform_id=  transforms.Compose([
            transforms.Resize(input_size), 
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            # transforms.Resize(input_size), 
            transforms.Grayscale(num_output_channels=3), 
            transforms.ToTensor(), 
            transforms.Normalize((0.1307,), (0.3081,))
        ])
    elif id_name == 'cifar100':
        input_size = (32, 32) 
        transform_id  = transforms.Compose([
            transforms.Resize(input_size), 
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),   
            transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))   
        ])
    if train == True:
        datasets_id = {
            'cifar10': (
                datasets.CIFAR10(root=data_root, train=True, download=True, transform=transform_id),
                datasets.CIFAR10(root=data_root, train=False, download=True, transform=transform_id)
            ),
            'cifar100': (
                datasets.CIFAR100(root=data_root, train=True, download=True, transform=transform_id),
                datasets.CIFAR100(root=data_root, train=False, download=True, transform=transform_id)
            ),
            'mnist': (
                datasets.MNIST(root=data_root, train=True, download=True, transform=transform_id),
                datasets.MNIST(root=data_root, train=False, download=True, transform=transform_id)
            ),
            'qmnist': (
                datasets.QMNIST(root=data_root, train=True, download=True, transform=transform_id),
                datasets.QMNIST(root=data_root, train=False, download=True, transform=transform_id)
            ),
            'kmnist': (
                datasets.KMNIST(root=data_root, train=True, download=True, transform=transform_id),
                datasets.KMNIST(root=data_root, train=False, download=True, transform=transform_id)
            ),
            'fmnist': (
                datasets.FashionMNIST(root=data_root, train=True, download=True, transform=transform_id),
                datasets.FashionMNIST(root=data_root, train=False, download=True, transform=transform_id)
            ),
            'svhn': (
                datasets.SVHN(root=data_root, split='train', download=True, transform=transform_id),
                datasets.SVHN(root=data_root, split='test', download=True, transform=transform_id)
            ),
        }
        print(" training dataloaders requested . Tranform = ")
         
        train_dataset, test_dataset = datasets_id[id_name]


        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False,num_workers=4)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,num_workers=4)
        # print(test_loader.dataset.transform)
        # print(" below is the tranform for trainloader")
        # print(train_loader.dataset.transform)
        return train_loader, test_loader

    
    print(" testing loader requested. Transform = ")
    if id_name == 'svhn':
        input_size = (32, 32) 
        transform_rgb = transform_id
        transform_bw =  transforms.Compose([
            transforms.Resize(input_size), 
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Common for SVHN
        ])
    elif id_name == 'cifar10':
        input_size = (32, 32) 
        transform_rgb = transform_id
        transform_bw  = transforms.Compose([
            transforms.Resize(input_size), 
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),   
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))   
        ])
    elif id_name in [ 'mnist','fmnist','kmnist','qmnist']:
        input_size = (32, 32) 
        transform_rgb = transform_id
        transform_bw=  transforms.Compose([
            transforms.Resize(input_size), 
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])
    elif id_name == 'cifar100':
        input_size = (32, 32) 
        transform_rgb = transform_id
        transform_bw  = transforms.Compose([
            transforms.Resize(input_size), 
            transforms.Grayscale(num_output_channels=3),
            transforms.ToTensor(),   
            transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))   
        ])

    
    datasets_ood = {
        'cifar10': (
            datasets.CIFAR10(root=data_root, train=False, download=True, transform=transform_rgb)
        ),
        'cifar100': (
            datasets.CIFAR100(root=data_root, train=False, download=True, transform=transform_rgb)
        ),
        'mnist': (          
            datasets.MNIST(root=data_root, train=False, download=True, transform=transform_bw)
        ),
        'qmnist': (           
            datasets.QMNIST(root=data_root, train=False, download=True, transform=transform_bw)
        ),
        'kmnist': (             
            datasets.KMNIST(root=data_root, train=False, download=True, transform=transform_bw)
        ),
        'fmnist': (         
            datasets.FashionMNIST(root=data_root, train=False, download=True, transform=transform_bw)
        ),
        'svhn': (
            datasets.SVHN(root=data_root, split='test', download=True, transform=transform_rgb)
        ),
        'eurosat': (
            datasets.EuroSAT(root=data_root, download=True, transform=transform_rgb)   # same for test (no separate split)
        ),
        'fake_data_set': (
             datasets.FakeData(image_size=(3, 32, 32), num_classes=10, transform=transform_rgb, train=False)
        ),
        'isun': (
              datasets.ImageFolder(root=f"{data_root}/iSUN", transform=transform_rgb)
        ),
        'lsun': (
             datasets.LSUN(root=f"{data_root}/lsun_resize", transform=transform_rgb)
        ),
        'dtd': (
            datasets.DTD(root=data_root, split='train', download=True, transform=transform_rgb),
            datasets.DTD(root=data_root, split='test', download=True, transform=transform_rgb)
        ),
        'places365': (
            datasets.Places365(root=data_root, split='val', download=True, transform=transform_rgb)
        )
        # 'inaturalist': (
        #     datasets.INaturalist(root=data_root, version='2021_train', download=False, transform=transform),  # requires download
        #     datasets.INaturalist(root=data_root, version='2021_val', download=True, transform=transform)
        # )
    }
    
   
    
    test_dataset = datasets_ood[id_name]
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,num_workers=4,)
    print(test_loader.dataset.transform)
    return test_loader




## Target Accuracy for Training

In [12]:
accuracy_dict = {
    "resnet18": {
        "svhn": 96.2,
        "mnist": 99.2,
        "kmnist": 99.2,
        "fmnist": 99.2,
        "qmnist": 99.2,
        "cifar10": 94.5,
        "cifar100": 76.4
    },
    "resnet34": {
        "svhn": 96.4,
        "mnist": 99.3,
        "kmnist": 99.3,
        "fmnist": 99.3,
        "qmnist": 99.3,
        "cifar10": 95.2,
        "cifar100": 77.8
    },
    "resnet50": {
        "svhn": 96.5,
        "mnist": 99.4,
        "kmnist": 99.4,
        "fmnist": 99.4,
        "qmnist": 99.4,
        "cifar10": 95.6,
        "cifar100": 78.5
    },
    "densenet3": {
        "svhn": 96.8,
        "mnist": 99.5,
        "kmnist": 99.5,
        "fmnist": 99.5,
        "qmnist": 99.5,
        "cifar10": 96,
        "cifar100": 79.5
    }
}

## Train with SGD optimizer and Cross Entropy Loss

In [13]:
def model_setup(model_name,num_classes,device):
      # Model setup
    if model_name == 'resnet18':
        model = ResNet18(num_c=num_classes).to(device)
    elif model_name == 'resnet34':
        model = ResNet34(num_c=num_classes).to(device)  
    elif model_name == 'resnet50':
        model = ResNet50(num_classes).to(device)          
    elif model_name == 'densenet3':
        model = DenseNet3(100, num_classes, growth_rate=12).to(device)
    return model

def train_sgd(model_name='', dataset='', num_epochs=200, batch_size=128, w_decay= 5e-4,log= log_file):        
    # Dataset setup
    if model_name == "densenet3":
        num_epochs = 300
    ten_classes = ['cifar10','svhn','mnist','fmnist']
    if dataset == 'cifar10' or dataset in ten_classes:
        num_classes = 10
    elif dataset == 'cifar100':  
        num_classes = 100  
            
    save_path = f'./pretrained/SGD/{model_name}_{dataset}/'
    
    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
    trainloader, test_loader = get_loader( dataset=dataset, batch_size=batch_size,id_name=dataset,train=True)
    

    model = model_setup(model_name,num_classes,device)

    
    if os.path.exists(weights_save_path):
        print(f" found weights for {model_name} {dataset} ")
        model.load_state_dict(torch.load(weights_save_path, weights_only=True))

        # Testing
        correct = 0
        total = 0
        model.eval()
        
        with torch.no_grad():
            for data in test_loader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)

                # Forward pass
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        correct = correct*100/ total
        diff = abs(correct - accuracy_dict[model_name][dataset])
        print(f" accuracy =  {correct} , with diff = {diff}")
        if diff <1 and dataset == "cifar100":
            print("not training ")
            return
        if diff < 0.5:
            print(" Not Training ")
            return
        
    
    model = model_setup(model_name,num_classes,device)
    model.train()
     
    # Scheduler and loss function setup
    milestones = [int(num_epochs * 0.5), int(num_epochs * 0.75)]
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), momentum=0.9, lr=0.1, weight_decay=w_decay)
    scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1)

    running_loss = 0
    model.to(device)
    epoch_count = 0
    with open(log_file,'a') as file:
        while epoch_count < num_epochs:
            epoch_count += 1
        # for epoch in range(num_epochs):
            epoch_loss = 0.0  # Accumulated loss for one epoch
           
            model.train()  # Set model to training mode
            print(f" running epoch  =  epoch = {epoch_count}")
            file.write(f" running epoch  =  epoch = {epoch_count} \n")
            for i, data in enumerate(trainloader, 0):
                # Load data and labels
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                # Zero the parameter gradients
                optimizer.zero_grad()
                # Forward pass and loss computation
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                # Backward pass and weight update
                loss.backward()
                optimizer.step()
                # Accumulate epoch loss
                epoch_loss += loss.item()
            # Accumulate running loss at the end of the epoch
            running_loss += epoch_loss
            # Update scheduler at the end of each epoch
            scheduler.step()
            # Print loss and save model every 10 epochs
            if (epoch_count + 1) % 5 == 0:
                # Compute average loss over 10 epochs
                avg_loss = running_loss / (5 * len(trainloader))
                print(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.9f}')
                file.write(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.9f} \n\n')
                running_loss = 0.0  # Reset running loss
                # --- Accuracy Evaluation on Test Set ---
                model.eval()  # Set model to evaluation mode
                correct = 0
                total = 0
                val_loss = 0.0
                with torch.no_grad():  # Disable gradient computation
                    for data in test_loader:  # Assuming you have a testloader
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)

                        outputs = model(images)
                        _, predicted = torch.max(outputs.data, 1)  # Get predicted class
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()

                accuracy = 100 * correct / total
                weights_save_path = save_path + f'{model_name}_{dataset}_epoch_{epoch_count + 1}.pth'
                if accuracy < accuracy_dict[model_name][dataset] and epoch_count + 10 > num_epochs:
                    num_epochs+= 50
                if accuracy  >= accuracy_dict[model_name][dataset] :
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)
                    # Save the model                
                    torch.save(model.state_dict(), weights_save_path)
                    print(f'Model saved at epoch {epoch_count + 1} in folder {save_path}')
                    file.write(f'Model saved at epoch {epoch_count + 1} in folder {save_path} \n')
                    print('Finished Training')
                    return 

                print(f' Accuracy: {accuracy:.2f}% ({correct}/{total}),  epoch total = {num_epochs} , current epoch ={epoch_count}')
                file.write(f' Accuracy: {accuracy:.2f}% ({correct}/{total}),  epoch total = {num_epochs} , current epoch ={epoch_count} \n')
                # Create 'pretrained' folder if it doesn't exist
                if (epoch_count + 1) % 20 == 0:
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)
                    # Save the model                
                    torch.save(model.state_dict(), weights_save_path)
                    print(f'Model saved at epoch {epoch_count + 1} in folder {save_path}')
                    file.write(f'Model saved at epoch {epoch_count + 1} in folder {save_path} \n')

            

    print('Finished Training')

## Train with ADAm optimizer and Cross Entropy Loss

In [14]:
def train_adam(model_name='', dataset='', num_epochs=200, batch_size=128, w_decay= 5e-4):        
    # Dataset setup
    ten_classes = ['cifar10','svhn','mnist','fmnist']
    if dataset == 'cifar10' or dataset in ten_classes:
        num_classes = 10
    elif dataset == 'cifar100':  
        num_classes = 100  
            
    save_path = f'./pretrained/Adam/{model_name}_{dataset}/'
    weights_save_path = save_path + f'{model_name}_{dataset}.pth'

    if os.path.exists(weights_save_path):
        print(f" found trained model weights for {model_name} , {dataset} ")
        model.load_state_dict(torch.load(weights_save_path, weights_only=True))

        # Testing
        correct = 0
        total = 0
        model.eval()
        
        with torch.no_grad():
            for data in test_loader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)

                # Forward pass
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        correct = correct*100/ total
        diff = abs(correct - accuracy_dict[model_name][dataset])
        print(f" accuracy =  {correct} , with diff = {diff}")
        if diff <1 and dataset == "cifar100":
            print("not training ")
            return
        if diff < 0.5:
            print(" Not Training ")
            return
    with open(log_file,'a') as file:
        file.write(" adam training \n \n")

    
    trainloader, test_loader = get_loader( dataset=dataset, batch_size=batch_size,id_name=dataset,train=True)

    # Model setup
    if model_name == 'resnet18':
        model = ResNet18(num_c=num_classes).to(device)
    elif model_name == 'resnet34':
        model = ResNet34(num_c=num_classes).to(device)  
    elif model_name == 'resnet50':
        model = ResNet50(num_classes).to(device)          
    elif model_name == 'densenet3':
        model = DenseNet3(100, num_classes, growth_rate=12).to(device)
    
    with open(log_file,'a') as file:
        file.write("model loaded and loader created")
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model.to(device)
    epoch_count = 0
    val_loss = 0.0
    running_loss = 0
    with open(log_file,'a') as file:
        while epoch_count < num_epochs:
            epoch_count += 1
        # for epoch in range(num_epochs):
            epoch_loss = 0.0  # Accumulated loss for one epoch
            
            model.train()  # Set model to training mode
            print(f" running epoch  =  epoch = {epoch_count}")
            file.write(f" running epoch  =  epoch = {epoch_count} \n")
            for i, data in enumerate(trainloader, 0):
                # Load data and labels
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass and loss computation
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                # Backward pass and weight update
                loss.backward()
                optimizer.step()

                # Accumulate epoch loss
                epoch_loss += loss.item()

            # Accumulate running loss at the end of the epoch
            running_loss += epoch_loss
            
            # Print loss and save model every 10 epochs
            if (epoch_count + 1) % 10 == 0:
                # Compute average loss over 10 epochs
                avg_loss = running_loss / (10 * len(trainloader))
                print(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.8f}')
                file.write(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.8f}\n')
                
                running_loss = 0.0  # Reset running loss
                # --- Accuracy Evaluation on Test Set ---
                model.eval()  # Set model to evaluation mode
                correct = 0
                total = 0
                val_loss = 0.0
                with torch.no_grad():  # Disable gradient computation
                    for data in test_loader:  # Assuming you have a testloader
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)

                        outputs = model(images)
                        _, predicted = torch.max(outputs.data, 1)  # Get predicted class
                        val_loss += criterion(outputs, labels).item()
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()

                accuracy = 100 * correct / total

                weights_save_path = save_path + f'{model_name}_{dataset}_epoch_{epoch_count + 1}.pth'
                if accuracy < accuracy_dict[model_name][dataset] and epoch_count + 10 > num_epochs:
                    num_epochs+= 20
                if accuracy+1 >= accuracy_dict[model_name][dataset] and epoch_count == num_epochs and dataset == "Cifar100":
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                if accuracy >= accuracy_dict[model_name][dataset] and epoch_count == num_epochs:
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                print(f' Accuracy: {accuracy:.2f}% ({correct}/{total})')
                file.write(f' Accuracy: {accuracy:.2f}% ({correct}/{total}),  epoch total = {num_epochs} , current epoch ={epoch_count} \n')

                # Create 'pretrained' folder if it doesn't exist

                if not os.path.exists(save_path):
                    os.makedirs(save_path)

                torch.save(model.state_dict(), weights_save_path)
                print(f'Model saved at epoch {epoch_count + 1} in folder {save_path}')
            
            # scheduler.step()
            

    print('Finished Training')

## train using RMSPROP and ADAMW

In [15]:
import copy
def train_RmsProp(model_name='', dataset='', num_epochs=200, batch_size=128):        
    # Dataset setup
    ten_classes = ['cifar10','svhn','mnist','fmnist']
    if dataset == 'cifar10' or dataset in ten_classes:
        num_classes = 10
    elif dataset == 'cifar100':  
        num_classes = 100  
            
    save_path = f'./pretrained/RMSProp/{model_name}_{dataset}/'
    weights_save_path = save_path + f'{model_name}_{dataset}.pth'

    if os.path.exists(weights_save_path):
        print(f" found trained model weights for {model_name} , {dataset} ")
        model.load_state_dict(torch.load(weights_save_path, weights_only=True))

        # Testing
        correct = 0
        total = 0
        model.eval()
        
        with torch.no_grad():
            for data in test_loader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)

                # Forward pass
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        correct = correct*100/ total
        diff = abs(correct - accuracy_dict[model_name][dataset])
        print(f" accuracy =  {correct} , with diff = {diff}")
        if diff <1 and dataset == "cifar100":
            print("not training ")
            return
        if diff < 0.5:
            print(" Not Training ")
            return
    with open(log_file,'a') as file:
        file.write(" adam training \n \n")

    
    trainloader, test_loader = get_loader( dataset=dataset, batch_size=batch_size,id_name=dataset,train=True)

    # Model setup
    if model_name == 'resnet18':
        model = ResNet18(num_c=num_classes).to(device)
    elif model_name == 'resnet34':
        model = ResNet34(num_c=num_classes).to(device)  
    elif model_name == 'resnet50':
        model = ResNet50(num_classes).to(device)          
    elif model_name == 'densenet3':
        model = DenseNet3(100, num_classes, growth_rate=12).to(device)
    
    with open(log_file,'a') as file:
        file.write("model loaded and loader created")
    # model.apply(lambda m: m.reset_parameters()) 
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.99)

    model.to(device)
    epoch_count = 0
    val_loss = 0.0
    running_loss = 0
    initial_num_epochs = copy.deepcopy(num_epochs)
    with open(log_file,'a') as file:
        while epoch_count < num_epochs:
            epoch_count += 1
        # for epoch in range(num_epochs):
            epoch_loss = 0.0  # Accumulated loss for one epoch
            
            model.train()  # Set model to training mode
            print(f" running epoch  =  epoch = {epoch_count}")
            file.write(f" running epoch  =  epoch = {epoch_count} \n")
            for i, data in enumerate(trainloader, 0):
                # Load data and labels
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                # Zero the parameter gradients
                optimizer.zero_grad()
                # Forward pass and loss computation
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                # Backward pass and weight update
                loss.backward()
                optimizer.step()
                # Accumulate epoch loss
                epoch_loss += loss.item()
            # Accumulate running loss at the end of the epoch
            running_loss += epoch_loss            
            # Print loss and save model every 10 epochs
            if (epoch_count + 1) % 10 == 0:
                # Compute average loss over 10 epochs
                avg_loss = running_loss / (10 * len(trainloader))
                print(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.8f}')
                file.write(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.8f}\n')                
                running_loss = 0.0  # Reset running loss
                # --- Accuracy Evaluation on Test Set ---
                model.eval()  # Set model to evaluation mode
                correct = 0
                total = 0
                val_loss = 0.0
                with torch.no_grad():  # Disable gradient computation
                    for data in test_loader:  # Assuming you have a testloader
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)
                        outputs = model(images)
                        _, predicted = torch.max(outputs.data, 1)  # Get predicted class
                        val_loss += criterion(outputs, labels).item()
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                accuracy = 100 * correct / total

                weights_save_path = save_path + f'{model_name}_{dataset}_epoch_{epoch_count + 1}.pth'
                if accuracy < accuracy_dict[model_name][dataset] and epoch_count + 10 > num_epochs:
                    num_epochs+= 20
                if accuracy+1 >= accuracy_dict[model_name][dataset] and epoch_count == num_epochs and dataset == "Cifar100":
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                if accuracy >= accuracy_dict[model_name][dataset] and epoch_count == num_epochs:
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                if num_epochs-initial_num_epochs >200:
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                print(f' Accuracy: {accuracy:.2f}% ({correct}/{total})')
                file.write(f' Accuracy: {accuracy:.2f}% ({correct}/{total}),  epoch total = {num_epochs} , current epoch ={epoch_count} \n')
                # Create 'pretrained' folder if it doesn't exist
                if not os.path.exists(save_path):
                    os.makedirs(save_path)

                torch.save(model.state_dict(), weights_save_path)
                print(f'Model saved at epoch {epoch_count + 1} in folder {save_path}')
            
            # scheduler.step()
            

    print('Finished Training')







def train_AdamW(model_name='', dataset='', num_epochs=200, batch_size=128):        
    # Dataset setup
    
    ten_classes = ['cifar10','svhn','mnist','fmnist']
    if dataset == 'cifar10' or dataset in ten_classes:
        num_classes = 10
    elif dataset == 'cifar100':  
        num_classes = 100  
            
    save_path = f'./pretrained/AdamW/{model_name}_{dataset}/'
    weights_save_path = save_path + f'{model_name}_{dataset}.pth'

    if os.path.exists(weights_save_path):
        print(f" found trained model weights for {model_name} , {dataset} ")
        model.load_state_dict(torch.load(weights_save_path, weights_only=True))

        # Testing
        correct = 0
        total = 0
        model.eval()
        
        with torch.no_grad():
            for data in test_loader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)

                # Forward pass
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        correct = correct*100/ total
        diff = abs(correct - accuracy_dict[model_name][dataset])
        print(f" accuracy =  {correct} , with diff = {diff}")
        if diff <1 and dataset == "cifar100":
            print("not training ")
            return
        if diff < 0.5:
            print(" Not Training ")
            return
    with open(log_file,'a') as file:
        file.write(" adam training \n \n")

    
    trainloader, test_loader = get_loader( dataset=dataset, batch_size=batch_size,id_name=dataset,train=True)

    # Model setup
    if model_name == 'resnet18':
        model = ResNet18(num_c=num_classes).to(device)
    elif model_name == 'resnet34':
        model = ResNet34(num_c=num_classes).to(device)  
    elif model_name == 'resnet50':
        model = ResNet50(num_classes).to(device)          
    elif model_name == 'densenet3':
        num_epochs = 300
        model = DenseNet3(100, num_classes, growth_rate=12).to(device)
    
    with open(log_file,'a') as file:
        file.write("model loaded and loader created")
    model.apply(lambda m: m.reset_parameters()) 
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.99, momentum=0.9, weight_decay=1e-5)
    initial_num_epochs = copy.deepcopy(num_epochs)
    model.to(device)
    epoch_count = 0
    val_loss = 0.0
    running_loss = 0
    with open(log_file,'a') as file:
        while epoch_count < num_epochs:
            epoch_count += 1
        # for epoch in range(num_epochs):
            epoch_loss = 0.0  # Accumulated loss for one epoch
            
            model.train()  # Set model to training mode
            print(f" running epoch  =  epoch = {epoch_count}")
            file.write(f" running epoch  =  epoch = {epoch_count} \n")
            for i, data in enumerate(trainloader, 0):
                # Load data and labels
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)
                # Zero the parameter gradients
                optimizer.zero_grad()
                # Forward pass and loss computation
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                # Backward pass and weight update
                loss.backward()
                optimizer.step()
                # Accumulate epoch loss
                epoch_loss += loss.item()
            # Accumulate running loss at the end of the epoch
            running_loss += epoch_loss            
            # Print loss and save model every 10 epochs
            if (epoch_count + 1) % 10 == 0:
                # Compute average loss over 10 epochs
                avg_loss = running_loss / (10 * len(trainloader))
                print(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.8f}')
                file.write(f'Epoch [{epoch_count + 1}] Average Loss: {avg_loss:.8f}\n')                
                running_loss = 0.0  # Reset running loss
                # --- Accuracy Evaluation on Test Set ---
                model.eval()  # Set model to evaluation mode
                correct = 0
                total = 0
                val_loss = 0.0
                with torch.no_grad():  # Disable gradient computation
                    for data in test_loader:  # Assuming you have a testloader
                        images, labels = data
                        images, labels = images.to(device), labels.to(device)
                        outputs = model(images)
                        _, predicted = torch.max(outputs.data, 1)  # Get predicted class
                        val_loss += criterion(outputs, labels).item()
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                accuracy = 100 * correct / total

                weights_save_path = save_path + f'{model_name}_{dataset}_epoch_{epoch_count + 1}.pth'
                if accuracy < accuracy_dict[model_name][dataset] and epoch_count + 10 > num_epochs:
                    num_epochs+= 20
                if accuracy+1 >= accuracy_dict[model_name][dataset] and epoch_count == num_epochs and dataset == "Cifar100":
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                if accuracy >= accuracy_dict[model_name][dataset] and epoch_count == num_epochs:
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                if num_epochs-initial_num_epochs >200:
                    epoch_count = num_epochs
                    weights_save_path = save_path + f'{model_name}_{dataset}.pth'
                print(f' Accuracy: {accuracy:.2f}% ({correct}/{total})')
                file.write(f' Accuracy: {accuracy:.2f}% ({correct}/{total}),  epoch total = {num_epochs} , current epoch ={epoch_count} \n')
                # Create 'pretrained' folder if it doesn't exist
                if not os.path.exists(save_path):
                    os.makedirs(save_path)

                torch.save(model.state_dict(), weights_save_path)
                print(f'Model saved at epoch {epoch_count + 1} in folder {save_path}')
            
            # scheduler.step()
            

    print('Finished Training')

## Train script

In [None]:
models = ["resnet34","densenet3",'resnet50',"resnet18"]
id_names = ['cifar10',"cifar100","mnist","svhn","fmnist","kmnist","qmnist"]
 
# with open(log_file,"a") as file:
#     for id_name in id_names:
#         for model in models:
#             file.write("-----------------------------------------------\n\n")
                
#             file.write(f" processing {model} and {id_name}\n")
#             train_sgd(model,id_name)
#             file.write("SGD done  ------\n\n")
           
#         file.write("-----------------------------------------------\n\n")
 
# models = ["resnet18","resnet34","densenet3",'resnet50']
# id_names = ['cifar10',"cifar100"]
# pair = [("resnet50","svhn"),("resnet50","mnist")]
# with open(log_file,"a") as file:
#     for id_name in id_names:
#         for model in models:
#             file.write("--------------------ADAM ---------------------------\n\n")
                
#             file.write(f" processing {model} and {id_name}\n")
#             train_adam(model,id_name)
#             file.write("SGD done  ------\n\n")
           
#         file.write("-----------------------------------------------\n\n")

#     generate_adversarial_samples(model, test_loader, dataset,, )

with open(log_file,"a") as file:
    for id_name in id_names:
        for model in models:
            file.write("-----------Generating ADV Samples--------------\n\n")
            
            file.write(f" processing {model} and {id_name}\n")
            train_RmsProp(model,id_name)
            # train_AdamW(model,id_name)
            file.write("SGD done  ------\n\n")
           
        file.write("-----------------------------------------------\n\n")
 

loader requested for cifar10, to be used on model trained for cifar10 
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Using downloaded and verified file: ./data/train_32x32.mat
Using downloaded and verified file: ./data/test_32x32.mat
 training dataloaders requested . Tranform = 
 running epoch  =  epoch = 1
 running epoch  =  epoch = 2
 running epoch  =  epoch = 3


# generate ADV Samples 

In [None]:
# import torch
# from torch.utils.data import DataLoader, TensorDataset
# import torchattacks
# import numpy as np
# import os
# from tqdm import tqdm 
 
# def generate_adversarial_samples(model_name,dataset,training_type):
#     """
#     Generate and save adversarial samples using FGSM, PGD, DeepFool, and AutoAttack.
    
#     Args:
#         model: PyTorch model to attack
#         test_loader: DataLoader with test data
#         m_name: Model name for file naming
#         id_name: Identifier for file naming
#         device: Device to run the model on ('cuda' or 'cpu')
#         training_type = SGD,RMSPRop,Adam,AdamW.
#     """
#     batch_size = 2048
#     _, test_loader = get_loader( dataset=dataset, batch_size=batch_size,id_name=dataset,train=True)

#     ten_classes = ['cifar10','svhn','mnist','fmnist']
#     if dataset == 'cifar10' or dataset in ten_classes:
#         num_classes = 10
#     elif dataset == 'cifar100':  
#         num_classes = 100  

#     # Model setup
#     if model_name == 'resnet18':
#         model = ResNet18(num_c=num_classes).to(device)
#     elif model_name == 'resnet34':
#         model = ResNet34(num_c=num_classes).to(device)  
#     elif model_name == 'resnet50':
#         model = ResNet50(num_classes).to(device)          
#     elif model_name == 'densenet3':
#         model = DenseNet3(100, num_classes, growth_rate=12).to(device)
    
#     with open(log_file,'a') as file:
#         file.write("model loaded and loader created")
    
#     # Initialize attack methods
 
#     attacks = {
#         'fgsm': torchattacks.FGSM(model, eps=0.03),
#         'pgd': torchattacks.PGD(model, eps=0.03, alpha=0.01, steps=10),
#         'deepfool': torchattacks.DeepFool(model, steps=50, overshoot=0.02),
#         'autoattack': torchattacks.AutoAttack(model, eps=0.03,version = 'standard',  norm='Linf'),
#         'cw' :  torchattacks.CW(model, c=1e-4, kappa=0, steps=1000, lr=0.01)  
#     }
    
#     # Ensure output directory exists
#     os.makedirs(f'./adv_samples/{training_type}', exist_ok=True)
    
#     for attack_name, attack in attacks.items():
#         save_path = f'./adv_samples/{training_type}/{training_type}/{model_name}_{dataset}_{attack_name}.pt'
#         if os.path.exists(save_path):
#             print(" samples already exist ")
#         else :
#             with open(log_file, 'a') as file:
#                 file.write(f" Creating {attack_name} samples for  {model_name} {dataset} .. \n")
#             data_list = []
#             label_list = []
        
#             for data, target in tqdm(test_loader, desc=f"Generating {attack_name} samples"):
#                 data, target = data.to(device), target.to(device)
                
#                 # Generate adversarial samples
#                 adv_data = attack(data, target)
                
#                 # Convert to numpy for saving
#                 # data_list.extend(adv_data.cpu().numpy())
#                 # label_list.extend(target.cpu().numpy())
#                 data_list.extend(adv_data.detach().cpu().numpy())
#                 label_list.extend(target.detach().cpu().numpy())
            
#             # Save adversarial samples
#             adv_samples = list(zip(data_list, label_list))
            
#             torch.save(adv_samples, save_path)
#             with open(log_file, 'a') as file:
#                 file.write(f'Saved {attack_name} adversarial samples to {save_path} \n')
#             print(f'Saved {attack_name} adversarial samples to {save_path}')
        
#         # Verify saved data can be loaded
#         loaded_data = torch.load(save_path, weights_only=False)
#         loaded_data_list, loaded_label_list = zip(*loaded_data)
#         inputs_tensor = torch.stack([torch.from_numpy(data).float() for data in loaded_data_list])
#         labels_tensor = torch.tensor(loaded_label_list, dtype=torch.long)
#         adv_dataset = TensorDataset(inputs_tensor, labels_tensor)
#         adv_test_loader = DataLoader(adv_dataset, batch_size=test_loader.batch_size, shuffle=False,num_workers=4)
#         eval_accuracy(model,log_file,model_name,dataset,adv_test_loader,attack_name)
#         print(f'Verified loading for {attack_name} dataset: {len(adv_dataset)} samples')

 



## Test model

In [None]:
# def test(model_name='', id_dataset='', attack='', ood_dataset='', batch_size=0):
#     # Dataset setup
#     if id_dataset == 'cifar10':
#         num_classes = 10
#     elif id_dataset == 'cifar100':
#         num_classes = 100  # Regular CIFAR-100 has 100 fine classes
            
#     mean, std = dataloader.get_mean_std(dataset=id_dataset)
            
#     if attack == '' and ood_dataset == '':                
#         print(f'=========================== Test model for {model_name}_{id_dataset} ==================================')
#         _, testloader = dataloader.get_imageloader(dataset=id_dataset, batch_size=batch_size, mean=mean, std=std)        
#     elif attack != '':
#         print(f'=========================== Test model for {model_name}_{attack} ==================================')
#         _, testloader = dataloader.get_imageloader(model_name=model_name, dataset=f'{id_dataset}_{attack}', batch_size=batch_size, mean=mean, std=std)
#     elif ood_dataset != '':                
#         print(f'=========================== Test model for {model_name}_{ood_dataset} ==================================')
#         _, testloader = dataloader.get_imageloader(dataset=id_dataset, batch_size=batch_size, mean=mean, std=std)
    
#     # Model setup
#     if model_name == 'resnet18':
#         model = ResNet18(num_c=num_classes).to(device)
#     elif model_name == 'resnet34':
#         model = ResNet34(num_c=num_classes).to(device)  
#     elif model_name == 'resnet50':
#         model = ResNet50(num_c=num_classes).to(device)          
#     elif model_name == 'densenet3':
#         model = DenseNet3(100, num_classes, growth_rate=12).to(device)

#     # Load model weights (from the last epoch)
#     model_path = f'./pretrained/{model_name}_{id_dataset}.pth'
#     model.load_state_dict(torch.load(model_path, weights_only=True))

#     # Testing
#     correct = 0
#     total = 0
#     model.eval()
    
#     with torch.no_grad():
#         for data in testloader:
#             images, labels = data
#             images, labels = images.to(device), labels.to(device)

#             # Forward pass
#             outputs = model(images)
#             _, predicted = torch.max(outputs, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()

#     # Print accuracy
#     print(f'Accuracy of the network on the {total} test images: {100 * correct / total:.2f} %')