<a href="https://colab.research.google.com/github/Cz1544252489/DailyWork/blob/main/jupyter%20notebook/version0.5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
import copy
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset, Subset, random_split

if torch.cuda.is_available():
    print("CUDA is available. Using GPU.")
else:
    print("CUDA is not available. Using CPU.")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def load_dataset2():
    # Data preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Load the full MNIST training dataset
    full_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

    # 20,000 samples were randomly selected
    subset_indices = torch.randperm(len(full_dataset))[:20000]
    subset_dataset = Subset(full_dataset, subset_indices)

    #  Divide 20,000 samples into 5,000 training sets, 5,000 validation sets, and 10,000 test sets
    train_set, val_set, test_set = random_split(subset_dataset, [5000, 5000, 10000])

    # Scramble the labeling of 2,500 samples in the training set
    rand_indices = torch.randperm(len(train_set))[:2500]
    for idx in rand_indices:
        # A new tag is randomly generated
        new_label = torch.randint(0, 10, (1,)).item()
        train_set.dataset.dataset.targets[subset_indices[train_set.indices[idx]]] = new_label

    # Create a data loader
    trainloader = DataLoader(train_set, batch_size=64, shuffle=True)
    valloader = DataLoader(val_set, batch_size=64, shuffle=True)
    testloader = DataLoader(test_set, batch_size=64, shuffle=True)

    return trainloader, valloader, testloader

def test(net, testloader):
    # Test the network
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)

            # forecast
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')


trainloader, valloader, testloader = load_dataset2()

CUDA is not available. Using CPU.
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|███████████████████████████████████████████████████████████████████| 9912422/9912422 [00:03<00:00, 2561193.85it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|████████████████████████████████████████████████████████████████████████████████████| 28881/28881 [00:00<?, ?it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|███████████████████████████████████████████████████████████████████| 1648877/1648877 [00:00<00:00, 3189349.82it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|█████████████████████████████████████████████████████████████████████████| 4542/4542 [00:00<00:00, 5137683.06it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw



In [2]:
# Define neural networks
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Linear(28*28, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc(x)
        return x

net_old = SimpleNet()

N = 5000
la = torch.rand([N,1],requires_grad=True).to(device)


In [8]:
# Use the same parameters to compare how well and badly optimizations are good
net = copy.deepcopy(net_old).to(device)

# Define the loss function and optimizer
def lower_function(output, label, la):
    crossentropy = nn.CrossEntropyLoss()
    loss = crossentropy(output, label)*la
    return loss

def upper_function(output, label):
    crossentropy = nn.CrossEntropyLoss()
    loss = crossentropy(output, label)+0.01*(torch.norm(net.fc.weight)+torch.norm(net.fc.bias))
    return loss

# SGD is significantly better than Adam's
# optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.01)

# Define the inner layer loop
def inner_loop(trainloader, net, la):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = lower_function(outputs, labels, la[i])
        #s = torch.cat((net.fc.weight.data, net.fc.bias.data.view(-1,1)), dim=1)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    return running_loss, net

# Test the network for the first time
test(net, testloader)


T = 100
# Train the network
for epoch in range(T):
    lower_loss, net  = inner_loop(trainloader, net, la)

    s = torch.cat((net.fc.weight.data, net.fc.bias.data.view(-1,1)), dim=1).view(-1)
    s_grad = torch.cat((net.fc.weight.grad.data, net.fc.bias.grad.data.view(-1,1)), dim=1).view(-1)

    if epoch % 10 ==9:
        print(f'[Epoch {epoch + 1}] lower_loss: {lower_loss / 200:.3f}')

    B = la.grad
    A = torch.cat((net.fc.weight.grad.data, net.fc.bias.grad.data.view(-1,1)), dim=1).view(-1)


    upper_loss = 0.0
    for i, data in enumerate(valloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = net(inputs)
        loss = upper_function(outputs, labels)

        upper_loss += loss


print(f'upper_loss: {upper_loss / 200:.3f}')


test(net, testloader)

Accuracy of the network on the 10000 test images: 11.0 %
[Epoch 10] lower_loss: 0.548
[Epoch 20] lower_loss: 0.497
[Epoch 30] lower_loss: 0.554
[Epoch 40] lower_loss: 0.415
[Epoch 50] lower_loss: 0.460
[Epoch 60] lower_loss: 0.428
[Epoch 70] lower_loss: 0.417
[Epoch 80] lower_loss: 0.580
[Epoch 90] lower_loss: 0.521
[Epoch 100] lower_loss: 0.525
upper_loss: 0.713
Accuracy of the network on the 10000 test images: 51.31 %


In [9]:
test(net, testloader)

Accuracy of the network on the 10000 test images: 51.31 %


In [10]:
print(net.fc.weight.grad.shape,net.fc.bias.shape)

torch.Size([10, 784]) torch.Size([10])


In [11]:
B = la.grad
A = net.fc.weight.grad
B = net.fc.bias.grad.view(-1,1)
C = torch.cat((A, B),dim=1)
s = torch.cat((net.fc.weight.data, net.fc.bias.data.view(-1,1)), dim=1).view(-1)
s_grad = torch.cat((net.fc.weight.grad.data, net.fc.bias.grad.data.view(-1,1)), dim=1).view(-1)
print(s.shape)
print(s_grad.shape)

torch.Size([7850])
torch.Size([7850])


In [12]:
print(inputs.device)

cpu
