In [None]:
import sys
import copy
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, Subset, random_split

if torch.cuda.is_available():
    print("CUDA is available. Using GPU.")
else:
    print("CUDA is not available. Using CPU.")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def load_dataset2():
    # Data preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Load the full MNIST training dataset
    full_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

    # 20,000 samples were randomly selected
    subset_indices = torch.randperm(len(full_dataset))[:20000]
    subset_dataset = Subset(full_dataset, subset_indices)

    #  Divide 20,000 samples into 5,000 training sets, 5,000 validation sets, and 10,000 test sets
    train_set, val_set, test_set = random_split(subset_dataset, [5000, 5000, 10000])

    # Scramble the labeling of 2,500 samples in the training set
    rand_indices = torch.randperm(len(train_set))[:2500]
    for idx in rand_indices:
        # A new tag is randomly generated
        new_label = torch.randint(0, 10, (1,)).item()
        train_set.dataset.dataset.targets[subset_indices[train_set.indices[idx]]] = new_label

    # Create a data loader
    trainloader = DataLoader(train_set, batch_size=64, shuffle=True)
    valloader = DataLoader(val_set, batch_size=64, shuffle=True)
    testloader = DataLoader(test_set, batch_size=64, shuffle=True)

    return trainloader, valloader, testloader

def test(net, testloader):
    # Test the network
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            # forecast
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total if total > 0 else 0
    print(f'Accuracy of the network on the 10000 test images: {accuracy} %')
    return accuracy



class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Linear(28*28, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc(x)
        return x

def train(lower_lr, upper_lr):
    # Define the neural network
    net_old = SimpleNet().to(device)

    # Define the loss function and optimizer
    lower_optimizer = optim.SGD(net_old.parameters(), lr=lower_lr, momentum=0.9)
    upper_optimizer = optim.Adam(net_old.parameters(), lr=upper_lr)

    trainloader, valloader, testloader = load_dataset2()

    def lower_function(output, label, la):
        crossentropy = nn.CrossEntropyLoss()
        loss = crossentropy(output, label) * la
        return loss

    def upper_function(output, label):
        crossentropy = nn.CrossEntropyLoss()
        loss = crossentropy(output, label) + 0.01 * (torch.norm(net_old.fc.weight) + torch.norm(net_old.fc.bias))
        return loss

    def inner_loop(trainloader, net, la):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            lower_optimizer.zero_grad()

            outputs = net(inputs)
            loss = lower_function(outputs, labels, la[i])
            loss.backward()
            lower_optimizer.step()

            running_loss += loss.item()
        return running_loss

    def outer_loop(trainloader, net, la):
        upper_optimizer.zero_grad()

        upper_loss = 0.0
        for i, data in enumerate(valloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = net(inputs)
            loss = upper_function(outputs, labels)

            upper_loss += loss

        upper_loss.backward()
        upper_optimizer.step()

        return upper_loss

    T = 10
    la = torch.rand([5000, 1], requires_grad=True).to(device)
    
    for epoch in range(T):
        lower_loss = inner_loop(trainloader, net_old, la)
        
        if epoch % 10 == 9:
            print(f'[Epoch {epoch + 1}] lower_loss: {lower_loss / 200:.3f}')

        upper_loss = outer_loop(trainloader, net_old, la)
        
    return net_old

if __name__ == "__main__":
    # Hyperparameters for hyperparameter optimization
    lower_lr_candidates = [0.01, 0.001, 0.0001]
    upper_lr_candidates = [0.01, 0.001, 0.0001]

    best_accuracy = 0.0
    best_lower_lr = 0.0
    best_upper_lr = 0.0

    for lower_lr in lower_lr_candidates:
        for upper_lr in upper_lr_candidates:
            print(f"Training with lower_lr={lower_lr}, upper_lr={upper_lr}")
            net_trained = train(lower_lr, upper_lr)
            _, _, testloader = load_dataset2()
            accuracy = test(net_trained, testloader)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_lower_lr = lower_lr
                best_upper_lr = upper_lr

    print(f"Best accuracy: {best_accuracy}%, Best lower_lr: {best_lower_lr}, Best upper_lr: {best_upper_lr}")


## 50 epochs

In [1]:
import sys
import copy
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, Subset, random_split

if torch.cuda.is_available():
    print("CUDA is available. Using GPU.")
else:
    print("CUDA is not available. Using CPU.")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def load_dataset2():
    # Data preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Load the full MNIST training dataset
    full_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

    # 20,000 samples were randomly selected
    subset_indices = torch.randperm(len(full_dataset))[:20000]
    subset_dataset = Subset(full_dataset, subset_indices)

    #  Divide 20,000 samples into 5,000 training sets, 5,000 validation sets, and 10,000 test sets
    train_set, val_set, test_set = random_split(subset_dataset, [5000, 5000, 10000])

    # Scramble the labeling of 2,500 samples in the training set
    rand_indices = torch.randperm(len(train_set))[:2500]
    for idx in rand_indices:
        # A new tag is randomly generated
        new_label = torch.randint(0, 10, (1,)).item()
        train_set.dataset.dataset.targets[subset_indices[train_set.indices[idx]]] = new_label

    # Create a data loader
    trainloader = DataLoader(train_set, batch_size=64, shuffle=True)
    valloader = DataLoader(val_set, batch_size=64, shuffle=True)
    testloader = DataLoader(test_set, batch_size=64, shuffle=True)

    return trainloader, valloader, testloader

def test(net, testloader):
    # Test the network
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            # forecast
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total if total > 0 else 0
    print(f'Accuracy of the network on the 10000 test images: {accuracy} %')
    return accuracy



class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Linear(28*28, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc(x)
        return x

def train(lower_lr, upper_lr):
    # Define the neural network
    net_old = SimpleNet().to(device)

    # Define the loss function and optimizer
    lower_optimizer = optim.SGD(net_old.parameters(), lr=lower_lr, momentum=0.9)
    upper_optimizer = optim.Adam(net_old.parameters(), lr=upper_lr)

    trainloader, valloader, testloader = load_dataset2()

    def lower_function(output, label, la):
        crossentropy = nn.CrossEntropyLoss()
        loss = crossentropy(output, label) * la
        return loss

    def upper_function(output, label):
        crossentropy = nn.CrossEntropyLoss()
        loss = crossentropy(output, label) + 0.01 * (torch.norm(net_old.fc.weight) + torch.norm(net_old.fc.bias))
        return loss

    def inner_loop(trainloader, net, la):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            lower_optimizer.zero_grad()

            outputs = net(inputs)
            loss = lower_function(outputs, labels, la[i])
            loss.backward()
            lower_optimizer.step()

            running_loss += loss.item()
        return running_loss

    def outer_loop(trainloader, net, la):
        upper_optimizer.zero_grad()

        upper_loss = 0.0
        for i, data in enumerate(valloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = net(inputs)
            loss = upper_function(outputs, labels)

            upper_loss += loss

        upper_loss.backward()
        upper_optimizer.step()

        return upper_loss

    T = 50
    la = torch.rand([5000, 1], requires_grad=True).to(device)
    
    for epoch in range(T):
        lower_loss = inner_loop(trainloader, net_old, la)
        
        if epoch % 10 == 9:
            print(f'[Epoch {epoch + 1}] lower_loss: {lower_loss / 200:.3f}')

        upper_loss = outer_loop(trainloader, net_old, la)
        
    return net_old

if __name__ == "__main__":
    # Hyperparameters for hyperparameter optimization
    lower_lr_candidates = [0.01, 0.001, 0.0001]
    upper_lr_candidates = [0.01, 0.001, 0.0001]

    best_accuracy = 0.0
    best_lower_lr = 0.0
    best_upper_lr = 0.0

    for lower_lr in lower_lr_candidates:
        for upper_lr in upper_lr_candidates:
            print(f"Training with lower_lr={lower_lr}, upper_lr={upper_lr}")
            net_trained = train(lower_lr, upper_lr)
            _, _, testloader = load_dataset2()
            accuracy = test(net_trained, testloader)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_lower_lr = lower_lr
                best_upper_lr = upper_lr

    print(f"Best accuracy: {best_accuracy}%, Best lower_lr: {best_lower_lr}, Best upper_lr: {best_upper_lr}")


CUDA is not available. Using CPU.
Training with lower_lr=0.01, upper_lr=0.01
[Epoch 10] lower_loss: 0.387
[Epoch 20] lower_loss: 0.376
[Epoch 30] lower_loss: 0.407
[Epoch 40] lower_loss: 0.361
[Epoch 50] lower_loss: 0.361
Accuracy of the network on the 10000 test images: 68.91 %
Training with lower_lr=0.01, upper_lr=0.001
[Epoch 10] lower_loss: 0.391
[Epoch 20] lower_loss: 0.381
[Epoch 30] lower_loss: 0.376
[Epoch 40] lower_loss: 0.355
[Epoch 50] lower_loss: 0.368
Accuracy of the network on the 10000 test images: 65.0 %
Training with lower_lr=0.01, upper_lr=0.0001
[Epoch 10] lower_loss: 0.391
[Epoch 20] lower_loss: 0.374
[Epoch 30] lower_loss: 0.357
[Epoch 40] lower_loss: 0.352
[Epoch 50] lower_loss: 0.345
Accuracy of the network on the 10000 test images: 54.35 %
Training with lower_lr=0.001, upper_lr=0.01
[Epoch 10] lower_loss: 0.475
[Epoch 20] lower_loss: 0.465
[Epoch 30] lower_loss: 0.463
[Epoch 40] lower_loss: 0.457
[Epoch 50] lower_loss: 0.448
Accuracy of the network on the 10000 