# CNN evaluation for cifar10


In [None]:
#import adahessian
!pip install torch_optimizer
import torch_optimizer as ada_optim

Collecting torch_optimizer
  Downloading torch_optimizer-0.3.0-py3-none-any.whl (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m51.2/61.9 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-ranger>=0.1.1 (from torch_optimizer)
  Downloading pytorch_ranger-0.1.1-py3-none-any.whl (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.5.0->torch_optimizer)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.5.0->torch_optimizer)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.5.0->torch_optimizer)
  

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Carregar e Pré-processar o CIFAR-10
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

# Definir o Modelo
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False),
            nn.ReLU()
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Dropout(0.25)
        )

        self.fc1 = nn.Sequential(
            nn.Linear(64 * 16 * 16, 128, bias=False),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

        self.out = nn.Linear(128, 10, bias=False)

        self._initialize_weights()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # Flatten the output
        x = self.fc1(x)
        output = self.out(x)
        return output

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
            if isinstance(m, nn.Linear):
                nn.init.constant_(m.weight, 1e-4)  # Regularization term





Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 47417205.88it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Treinar o modelo
def train(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()

#train(model, trainloader, testloader, criterion, optimizer, epochs=10)

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)


In [None]:
# Avaliar o modelo
def test(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')
    acc = correct/total
    return(acc)

print(test(model, testloader))



Accuracy of the network on the 10000 test images: 10.0 %
0.1


In [None]:
results={}


#ADAM
# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)




model_perform={}

#treina o modelo com 50 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results['adam'] = model_perform
torch.save(model.state_dict(),'adam_cnn_cifa10')

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)
Epoch [1/50], Train Loss: 1.5902, Val Loss: 1.2271, Val Acc: 56.81%
Epoch [2/50], Train Loss: 1.2520, Val Loss: 1.0465, Val Acc: 63.11%
Epoch [3/50], Train Loss: 1.1086, Val Loss: 0.9864, Val Acc: 66.12%
Epoch [4/50], Train Loss: 1.0127, Val Loss: 0.9676, Val Acc: 66.24%
Epoch [5/50], Train Loss: 0.9359, Val Loss: 0.9148, Val Acc: 68.32%
Epoch [6/50], Train Loss: 0.8741, Val Loss: 0.8994, Val A

In [None]:
#SGD

# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results['sgd'] = model_perform
torch.save(model.state_dict(),'sgd_cnn_cifar10')

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)
Epoch [1/50], Train Loss: 2.3001, Val Loss: 2.2985, Val Acc: 12.29%
Epoch [2/50], Train Loss: 2.2870, Val Loss: 2.2510, Val Acc: 15.89%
Epoch [3/50], Train Loss: 2.1694, Val Loss: 2.0897, Val Acc: 21.98%
Epoch [4/50], Train Loss: 2.0550, Val Loss: 1.9961, Val Acc: 26.85%
Epoch [5/50], Train Loss: 1.9803, Val Loss: 1.9258, Val Acc: 30.16%
Epoch [6/50], Train Loss: 1.9183, Val Loss: 1.8657, Val A

In [None]:
# Treinar o modelo
def train_hess(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward(create_graph=True)
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()


#ADAHESSIAN

# Instanciar o modelo
model = SimpleCNN().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = ada_optim.Adahessian(model.parameters(), lr = 0.001)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train_hess(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results['Adahessian'] = model_perform
torch.save(model.state_dict(),'adahessian_cnn_cifar10')

SimpleCNN(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=16384, out_features=128, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=128, out_features=10, bias=False)
)


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [1/50], Train Loss: 2.3003, Val Loss: 2.2981, Val Acc: 10.00%
Epoch [2/50], Train Loss: 2.2901, Val Loss: 2.2673, Val Acc: 15.79%
Epoch [3/50], Train Loss: 2.1889, Val Loss: 2.0961, Val Acc: 20.89%
Epoch [4/50], Train Loss: 2.0516, Val Loss: 1.9843, Val Acc: 26.24%
Epoch [5/50], Train Loss: 1.9670, Val Loss: 1.9058, Val Acc: 31.75%
Epoch [6/50], Train Loss: 1.9025, Val Loss: 1.8402, Val Acc: 34.61%
Epoch [7/50], Train Loss: 1.8449, Val Loss: 1.7798, Val Acc: 36.58%
Epoch [8/50], Train Loss: 1.7925, Val Loss: 1.7251, Val Acc: 38.61%
Epoch [9/50], Train Loss: 1.7433, Val Loss: 1.6740, Val Acc: 40.11%
Epoch [10/50], Train Loss: 1.6972, Val Loss: 1.6275, Val Acc: 41.68%
Epoch [11/50], Train Loss: 1.6578, Val Loss: 1.5875, Val Acc: 42.97%
Epoch [12/50], Train Loss: 1.6197, Val Loss: 1.5555, Val Acc: 44.40%
Epoch [13/50], Train Loss: 1.5902, Val Loss: 1.5254, Val Acc: 45.29%
Epoch [14/50], Train Loss: 1.5609, Val Loss: 1.4962, Val Acc: 46.00%
Epoch [15/50], Train Loss: 1.5355, Val Loss

In [None]:
import json
with open('results.json', 'w') as f:
    json.dump(results, f)
results

{'adam': {'train_losses': [1.590164584465027,
   1.2519942048645019,
   1.1086417794799805,
   1.0127390199279784,
   0.9358503803253174,
   0.874054369392395,
   0.8110197282791137,
   0.7698344395637512,
   0.7366180632781982,
   0.6949183436584473,
   0.6649042266654969,
   0.6373139685630799,
   0.605095704870224,
   0.5887161101341247,
   0.579220671710968,
   0.5535266841793061,
   0.5390034580230713,
   0.5216813322257996,
   0.5074019215869904,
   0.4936124308204651,
   0.4878903732442856,
   0.4752003056716919,
   0.4720305403518677,
   0.4585906790828705,
   0.45204107931137083,
   0.4461482344055176,
   0.441104617767334,
   0.4303910920333862,
   0.42905445552825927,
   0.4222751952934265,
   0.41356560861587527,
   0.4084376046562195,
   0.40346481088638303,
   0.4079769277381897,
   0.3955028570461273,
   0.3938916843032837,
   0.38916474439620974,
   0.3842560176563263,
   0.377643616733551,
   0.37661827850341795,
   0.37761816915512086,
   0.36651902200222014,
   0.362

# MLP evaluation for cifar10

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import os

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Carregar e Pré-processar o CIFAR-10
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

# Definir o Modelo

class SimpleMLP(nn.Module):
    def __init__(self, input_size=3*32*32, num_classes=10):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Sequential(
            nn.Linear(input_size, 512, bias=False),
            nn.ReLU(),
            nn.Dropout(0.25)
        )

        self.fc2 = nn.Sequential(
            nn.Linear(512, 256, bias=False),
            nn.ReLU(),
            nn.Dropout(0.5)
        )

        self.out = nn.Linear(256, num_classes, bias=False)

        self._initialize_weights()

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.fc1(x)
        x = self.fc2(x)
        output = self.out(x)
        return output

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.weight, 1e-4)  # Regularization term

# Treinar o modelo
def train(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()




Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 48553287.82it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
results_mlp={}


#ADAM
# Instanciar o modelo
model = SimpleMLP().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)




model_perform={}

#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results_mlp['adam'] = model_perform
torch.save(model.state_dict(),'adam_mlp_cifar10')

SimpleMLP(
  (fc1): Sequential(
    (0): Linear(in_features=3072, out_features=512, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
  )
  (fc2): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=False)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (out): Linear(in_features=256, out_features=10, bias=False)
)
Epoch [1/50], Train Loss: 1.8298, Val Loss: 1.5908, Val Acc: 43.00%
Epoch [2/50], Train Loss: 1.6631, Val Loss: 1.5325, Val Acc: 45.51%
Epoch [3/50], Train Loss: 1.6046, Val Loss: 1.4764, Val Acc: 48.36%
Epoch [4/50], Train Loss: 1.5674, Val Loss: 1.4703, Val Acc: 47.78%
Epoch [5/50], Train Loss: 1.5311, Val Loss: 1.4589, Val Acc: 48.42%
Epoch [6/50], Train Loss: 1.5053, Val Loss: 1.4299, Val Acc: 49.81%
Epoch [7/50], Train Loss: 1.4745, Val Loss: 1.4553, Val Acc: 49.53%
Epoch [8/50], Train Loss: 1.4552, Val Loss: 1.4105, Val Acc: 50.88%
Epoch [9/50], Train Loss: 1.4316, Val Loss: 1.4173, Val Acc: 51.40%
Epoch [10/50], Train Loss

NameError: name 'test' is not defined

In [None]:
#SGD

# Instanciar o modelo
model = SimpleMLP().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results_mlp['sgd'] = model_perform
torch.save(model.state_dict(),'sgd_mlp_cifar10')


# #SGD

# # Instanciar o modelo
# model = SimpleCNN().to(device)
# print(model)

# # Definir a função de perda
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001)



# model_perform={}
# #treina o modelo com 30 epocas
# model_perform['train_losses'], model_perform['validation_losses'] = train(model, trainloader, testloader, criterion, optimizer, epochs=50)
# model_perform['accuracy'] = test(model, testloader)
# results['sgd'] = model_perform
# torch.save(model.state_dict(),'sgd_cnn_cifar10')

In [None]:
def train_hess(model, trainloader, testloader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        #training
        model.train()
        train_loss = 0.0
        for inputs, labels in trainloader:
            inputs, labels = inputs.to(device), labels.to(device)


            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward(create_graph=True)
            optimizer.step()


            train_loss += loss.item()*inputs.size(0)
        train_loss = train_loss/len(trainloader.dataset)
        train_losses.append(train_loss)

        #validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in testloader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)

                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(testloader.dataset)
        val_losses.append(val_loss)
        print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.2f}%'
              .format(epoch+1, epochs, train_loss, val_loss, 100 * correct / total))





    print("Finished Training/Validation")
    return train_losses.copy(), val_losses.copy()



#ADAHESSIAN

# Instanciar o modelo
model = SimpleMLP().to(device)
print(model)

# Definir a função de perda
criterion = nn.CrossEntropyLoss()
optimizer = ada_optim.Adahessian(model.parameters(), lr = 0.001)



model_perform={}
#treina o modelo com 30 epocas
model_perform['train_losses'], model_perform['validation_losses'] = train_hess(model, trainloader, testloader, criterion, optimizer, epochs=50)
model_perform['accuracy'] = test(model, testloader)
results_mlp['Adahessian'] = model_perform
torch.save(model.state_dict(),'adahessian_mlp_cifar10')

In [None]:
import json
with open('results_mlp.json', 'w') as f:
    json.dump(results_mlp, f)
results_mlp