# CNN evaluation for cifar10


In [None]:
#import adahessian
!pip install torch_optimizer
import torch_optimizer as ada_optim

Collecting torch_optimizer
  Downloading torch_optimizer-0.3.0-py3-none-any.whl (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-ranger>=0.1.1 (from torch_optimizer)
  Downloading pytorch_ranger-0.1.1-py3-none-any.whl (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.5.0->torch_optimizer)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.5.0->torch_optimizer)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.5.0->torch_optimizer)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Carregar e Pré-processar o CIFAR-10
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

# Definir o Modelo
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False),
            nn.ReLU()
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.25)
        )

        self.fc1 = nn.Sequential(
            nn.Linear(64 * 16 * 16, 128, bias=False),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

        self.out = nn.Linear(128, 10, bias=False)

        self._initialize_weights()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # Flatten the output
        x = self.fc1(x)
        output = self.out(x)
        return output

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
            if isinstance(m, nn.Linear):
                nn.init.constant_(m.weight, 1e-4)  # Regularization term





Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 47417205.88it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Treinar o modelo
def train(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()

#train(model, trainloader, testloader, criterion, optimizer, epochs=10)

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)


In [None]:
# Avaliar o modelo
def test(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')
    acc = correct/total
    return(acc)

print(test(model, testloader))



Accuracy of the network on the 10000 test images: 10.0 %
0.1


In [None]:
results={}


#ADAM
# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)




model_perform={}

#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results['adam'] = model_perform
torch.save(model.state_dict(),'adam_cnn_cifa10')

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)
Epoch [1/50], Train Loss: 1.5902, Val Loss: 1.2271, Val Acc: 56.81%
Epoch [2/50], Train Loss: 1.2520, Val Loss: 1.0465, Val Acc: 63.11%
Epoch [3/50], Train Loss: 1.1086, Val Loss: 0.9864, Val Acc: 66.12%
Epoch [4/50], Train Loss: 1.0127, Val Loss: 0.9676, Val Acc: 66.24%
Epoch [5/50], Train Loss: 0.9359, Val Loss: 0.9148, Val Acc: 68.32%
Epoch [6/50], Train Loss: 0.8741, Val Loss: 0.8994, Val A

In [None]:
#SGD

# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results['sgd'] = model_perform
torch.save(model.state_dict(),'sgd_cnn_cifar10')

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)
Epoch [1/50], Train Loss: 2.3001, Val Loss: 2.2985, Val Acc: 12.29%
Epoch [2/50], Train Loss: 2.2870, Val Loss: 2.2510, Val Acc: 15.89%
Epoch [3/50], Train Loss: 2.1694, Val Loss: 2.0897, Val Acc: 21.98%
Epoch [4/50], Train Loss: 2.0550, Val Loss: 1.9961, Val Acc: 26.85%
Epoch [5/50], Train Loss: 1.9803, Val Loss: 1.9258, Val Acc: 30.16%
Epoch [6/50], Train Loss: 1.9183, Val Loss: 1.8657, Val A

In [None]:
# Treinar o modelo
def train_hess(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward(create_graph=True)
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()


#ADAHESSIAN

# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = ada_optim.Adahessian(model.parameters(), lr = 0.001)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train_hess(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results['Adahessian'] = model_perform
torch.save(model.state_dict(),'adahessian_cnn_cifar10')

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [1/50], Train Loss: 2.3003, Val Loss: 2.2981, Val Acc: 10.00%
Epoch [2/50], Train Loss: 2.2901, Val Loss: 2.2673, Val Acc: 15.79%
Epoch [3/50], Train Loss: 2.1889, Val Loss: 2.0961, Val Acc: 20.89%
Epoch [4/50], Train Loss: 2.0516, Val Loss: 1.9843, Val Acc: 26.24%
Epoch [5/50], Train Loss: 1.9670, Val Loss: 1.9058, Val Acc: 31.75%
Epoch [6/50], Train Loss: 1.9025, Val Loss: 1.8402, Val Acc: 34.61%
Epoch [7/50], Train Loss: 1.8449, Val Loss: 1.7798, Val Acc: 36.58%
Epoch [8/50], Train Loss: 1.7925, Val Loss: 1.7251, Val Acc: 38.61%
Epoch [9/50], Train Loss: 1.7433, Val Loss: 1.6740, Val Acc: 40.11%
Epoch [10/50], Train Loss: 1.6972, Val Loss: 1.6275, Val Acc: 41.68%
Epoch [11/50], Train Loss: 1.6578, Val Loss: 1.5875, Val Acc: 42.97%
Epoch [12/50], Train Loss: 1.6197, Val Loss: 1.5555, Val Acc: 44.40%
Epoch [13/50], Train Loss: 1.5902, Val Loss: 1.5254, Val Acc: 45.29%
Epoch [14/50], Train Loss: 1.5609, Val Loss: 1.4962, Val Acc: 46.00%
Epoch [15/50], Train Loss: 1.5355, Val Loss

In [None]:
import json
with open('results.json', 'w') as f:
    json.dump(results, f)
results

{'adam': {'train_losses': [1.590164584465027,
   1.2519942048645019,
   1.1086417794799805,
   1.0127390199279784,
   0.9358503803253174,
   0.874054369392395,
   0.8110197282791137,
   0.7698344395637512,
   0.7366180632781982,
   0.6949183436584473,
   0.6649042266654969,
   0.6373139685630799,
   0.605095704870224,
   0.5887161101341247,
   0.579220671710968,
   0.5535266841793061,
   0.5390034580230713,
   0.5216813322257996,
   0.5074019215869904,
   0.4936124308204651,
   0.4878903732442856,
   0.4752003056716919,
   0.4720305403518677,
   0.4585906790828705,
   0.45204107931137083,
   0.4461482344055176,
   0.441104617767334,
   0.4303910920333862,
   0.42905445552825927,
   0.4222751952934265,
   0.41356560861587527,
   0.4084376046562195,
   0.40346481088638303,
   0.4079769277381897,
   0.3955028570461273,
   0.3938916843032837,
   0.38916474439620974,
   0.3842560176563263,
   0.377643616733551,
   0.37661827850341795,
   0.37761816915512086,
   0.36651902200222014,
   0.362

# MLP evaluation for cifar10

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Carregar e Pré-processar o CIFAR-10
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

# Definir o Modelo

class SimpleMLP(nn.Module):
    def __init__(self, input_size=3*32*32, num_classes=10):
        super(SimpleMLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Dropout(0.25)
        )
        self.out = nn.Sequential(
            nn.Linear(512, num_classes),
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        output = self.out(x)
        return output

# Treinar o modelo
def train(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()

# Avaliar o modelo
def test(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')
    acc = correct/total
    return(acc)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
results_mlp={}


#ADAM
# Instanciar o modelo
model = SimpleMLP().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)




model_perform={}

#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results_mlp['adam'] = model_perform
torch.save(model.state_dict(),'adam_mlp_cifar10')

SimpleMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Sequential(
    (0): Linear(in_features=3072, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
  )
  (out): Sequential(
    (0): Linear(in_features=512, out_features=10, bias=True)
    (1): Softmax(dim=1)
  )
)
Epoch [1/50], Train Loss: 2.1062, Val Loss: 2.0791, Val Acc: 37.93%
Epoch [2/50], Train Loss: 2.0769, Val Loss: 2.0557, Val Acc: 40.17%
Epoch [3/50], Train Loss: 2.0678, Val Loss: 2.0644, Val Acc: 39.44%
Epoch [4/50], Train Loss: 2.0602, Val Loss: 2.0644, Val Acc: 39.49%
Epoch [5/50], Train Loss: 2.0588, Val Loss: 2.0395, Val Acc: 42.02%
Epoch [6/50], Train Loss: 2.0534, Val Loss: 2.0416, Val Acc: 41.83%
Epoch [7/50], Train Loss: 2.0487, Val Loss: 2.0481, Val Acc: 41.11%
Epoch [8/50], Train Loss: 2.0513, Val Loss: 2.0466, Val Acc: 41.36%
Epoch [9/50], Train Loss: 2.0487, Val Loss: 2.0455, Val Acc: 41.44%
Epoch [10/50], Train Loss: 2.0472, Val Loss: 2.0469, Val Acc: 41.29%
Epoch 

In [None]:
#SGD

# Instanciar o modelo
model = SimpleMLP().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results_mlp['sgd'] = model_perform
torch.save(model.state_dict(),'sgd_mlp_cifar10')



SimpleMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Sequential(
    (0): Linear(in_features=3072, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
  )
  (out): Sequential(
    (0): Linear(in_features=512, out_features=10, bias=True)
    (1): Softmax(dim=1)
  )
)
Epoch [1/50], Train Loss: 2.2993, Val Loss: 2.2965, Val Acc: 20.49%
Epoch [2/50], Train Loss: 2.2937, Val Loss: 2.2901, Val Acc: 22.61%
Epoch [3/50], Train Loss: 2.2869, Val Loss: 2.2823, Val Acc: 23.55%
Epoch [4/50], Train Loss: 2.2788, Val Loss: 2.2732, Val Acc: 23.41%
Epoch [5/50], Train Loss: 2.2698, Val Loss: 2.2635, Val Acc: 23.33%
Epoch [6/50], Train Loss: 2.2604, Val Loss: 2.2537, Val Acc: 23.43%
Epoch [7/50], Train Loss: 2.2509, Val Loss: 2.2443, Val Acc: 23.49%
Epoch [8/50], Train Loss: 2.2421, Val Loss: 2.2359, Val Acc: 23.80%
Epoch [9/50], Train Loss: 2.2346, Val Loss: 2.2287, Val Acc: 24.01%
Epoch [10/50], Train Loss: 2.2282, Val Loss: 2.2228, Val Acc: 24.31%
Epoch 

In [None]:
def train_hess(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward(create_graph=True)
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()



#ADAHESSIAN

# Instanciar o modelo
model = SimpleMLP().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = ada_optim.Adahessian(model.parameters(), lr = 0.01)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train_hess(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results_mlp['Adahessian'] = model_perform
torch.save(model.state_dict(),'adahessian_mlp_cifar10')

SimpleMLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Sequential(
    (0): Linear(in_features=3072, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
  )
  (out): Sequential(
    (0): Linear(in_features=512, out_features=10, bias=True)
    (1): Softmax(dim=1)
  )
)


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [1/50], Train Loss: 2.2687, Val Loss: 2.2330, Val Acc: 24.45%
Epoch [2/50], Train Loss: 2.2157, Val Loss: 2.1972, Val Acc: 28.11%
Epoch [3/50], Train Loss: 2.1875, Val Loss: 2.1729, Val Acc: 30.65%
Epoch [4/50], Train Loss: 2.1662, Val Loss: 2.1537, Val Acc: 32.57%
Epoch [5/50], Train Loss: 2.1487, Val Loss: 2.1379, Val Acc: 34.20%
Epoch [6/50], Train Loss: 2.1344, Val Loss: 2.1265, Val Acc: 34.97%
Epoch [7/50], Train Loss: 2.1249, Val Loss: 2.1181, Val Acc: 35.47%
Epoch [8/50], Train Loss: 2.1172, Val Loss: 2.1113, Val Acc: 35.97%
Epoch [9/50], Train Loss: 2.1112, Val Loss: 2.1054, Val Acc: 36.64%
Epoch [10/50], Train Loss: 2.1053, Val Loss: 2.1000, Val Acc: 37.20%
Epoch [11/50], Train Loss: 2.1002, Val Loss: 2.0953, Val Acc: 37.47%
Epoch [12/50], Train Loss: 2.0954, Val Loss: 2.0904, Val Acc: 37.95%
Epoch [13/50], Train Loss: 2.0906, Val Loss: 2.0860, Val Acc: 38.37%
Epoch [14/50], Train Loss: 2.0867, Val Loss: 2.0816, Val Acc: 38.80%
Epoch [15/50], Train Loss: 2.0819, Val Loss

In [None]:
import json
with open('results_mlp.json', 'w') as f:
    json.dump(results_mlp, f)
results_mlp

{'adam': {'train_losses': [2.1062370693588255,
   2.076851094207764,
   2.0678223641967772,
   2.0602485543060305,
   2.0587779837799074,
   2.0534436991119387,
   2.048690745010376,
   2.051316252593994,
   2.0486506270980835,
   2.0472399550628664,
   2.0459978631591795,
   2.0459018352508545,
   2.041879363288879,
   2.0415551443862916,
   2.043610722885132,
   2.0331813047790526,
   2.037590685119629,
   2.0437876927947998,
   2.0348826806640625,
   2.034775110015869,
   2.0367631427764894,
   2.035511237182617,
   2.0313923596191406,
   2.0278783959960935,
   2.0293517457580568,
   2.020473405380249,
   2.025565743789673,
   2.0230034784698487,
   2.0239455332946776,
   2.024515530090332,
   2.0291555434417723,
   2.0310216804504395,
   2.0307760675811766,
   2.024975893936157,
   2.020272402267456,
   2.029899219207764,
   2.023634532852173,
   2.0178457112121584,
   2.019310655899048,
   2.025860979309082,
   2.020700854034424,
   2.0217905261993407,
   2.017151405105591,
   2.0