In [None]:
!pip install -Uq wandb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m243.9/243.9 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pathtools (setup.py) ... [?25l[?25hdone


In [None]:
# Logeo a wandb.
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
# Importo las librerías necesarias.
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch.utils.data
import numpy as np
import wandb

# Variable que define si se evalua en test o no
EVAL_TEST = True

# Utilizo GPU de estar disponible.
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Seteo una semilla para replicabilidad.
torch.manual_seed(181988)
if torch.cuda.is_available():
    torch.cuda.manual_seed(181988)

# Parametros
batch_size = 128
initial_learning_rate = 0.06801699813974062
epochs = 90
dropout_rate = .2
experiment_name = 'rn50_elu_adadelta_dropout.2'
project_name = 'TP3'

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Descargo el dataset CIFAR10, divido en training, validation, testing.
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) # Con flip en train.
valset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) # Sin flip en val.

targets_ = trainset.targets
train_idx, val_idx = train_test_split(np.arange(len(targets_)), test_size=0.2, stratify=targets_)
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)

trainloader = torch.utils.data.DataLoader(trainset, sampler=train_sampler,batch_size=batch_size, num_workers=2)
valloader = torch.utils.data.DataLoader(valset, sampler=val_sampler,batch_size=batch_size, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) # Sin flip en test.
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Defino la CNN.
class NetConv(nn.Module):
    def __init__(self):
        super().__init__()

        self.resnet50 = models.resnet50(weights=True)

        num_features = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Sequential(
            nn.Linear(num_features, 1024),
            nn.ELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(1024, 256),
            nn.ELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 64),
            nn.ELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 16),
            nn.ELU(),
            nn.Linear(16, 10)
        )

    def forward(self, x):
        x = self.resnet50(x)
        return x

net = NetConv()
net.to(device)

# Defino la loss, optimizador y scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(net.parameters(), lr=initial_learning_rate)

# Configuración de wandb
wandb.init(
    project = project_name,
    name = experiment_name,
    config = {
        "learning_rate": initial_learning_rate,
        "dropout_rate": dropout_rate,
        "batch_size": batch_size,
        "epochs": epochs,
    }
)

best_accuracy = 0
best_epoch = -1

# Entrenamiento de la CNN
for epoch in range(epochs):
    net.train()
    running_loss = 0.0
    train_correct = 0
    total = 0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs = net(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Printeo el progreso cada 200 mini-batches.
        running_loss += loss.item()
        if i % 200 == 199:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')


        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    # Validacion
    train_accuracy = 100 * train_correct / total
    running_loss = running_loss / total

    val_correct = 0
    total = 0
    val_loss = 0

    net.eval()
    with torch.no_grad():
        for data in valloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
            val_loss += criterion(outputs, labels).item()

    # Para estadisticas en wandb
    val_accuracy = 100 * val_correct / total
    val_loss = val_loss / total

    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_epoch = epoch
        best_model_state_dict = net.state_dict()

    wandb.log({ "train_accuracy": train_accuracy, "val_accuracy": val_accuracy, "train_loss": running_loss, "val_loss": val_loss})

# Indico por consola cuando finalizó el entrenamiento
print("Entrenamiento finalizado, accuracy en validation de la mejor epoch: "+ str(best_accuracy))

# Guardo el modelo entrenado en disco
PATH = './cifar_net.pth'
torch.save(best_model_state_dict, PATH)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29493781.66it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 90.8MB/s]
[34m[1mwandb[0m: Currently logged in as: [33mfgiorgi[0m ([33mcarrostp3[0m). Use [1m`wandb login --relogin`[0m to force relogin


[1,   200] loss: 0.161
[2,   200] loss: 0.064
[3,   200] loss: 0.044
[4,   200] loss: 0.034
[5,   200] loss: 0.027
[6,   200] loss: 0.021
[7,   200] loss: 0.018
[8,   200] loss: 0.015
[9,   200] loss: 0.012
[10,   200] loss: 0.010
[11,   200] loss: 0.009
[12,   200] loss: 0.008
[13,   200] loss: 0.007
[14,   200] loss: 0.007
[15,   200] loss: 0.006
[16,   200] loss: 0.005
[17,   200] loss: 0.005
[18,   200] loss: 0.004
[19,   200] loss: 0.004
[20,   200] loss: 0.004
[21,   200] loss: 0.004
[22,   200] loss: 0.003
[23,   200] loss: 0.003
[24,   200] loss: 0.003
[25,   200] loss: 0.003
[26,   200] loss: 0.003
[27,   200] loss: 0.002
[28,   200] loss: 0.002
[29,   200] loss: 0.002
[30,   200] loss: 0.002
[31,   200] loss: 0.002
[32,   200] loss: 0.002
[33,   200] loss: 0.002
[34,   200] loss: 0.002
[35,   200] loss: 0.001
[36,   200] loss: 0.002
[37,   200] loss: 0.002
[38,   200] loss: 0.002
[39,   200] loss: 0.001
[40,   200] loss: 0.002
[41,   200] loss: 0.001
[42,   200] loss: 0.001
[

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.012 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.099232…

0,1
train_accuracy,▁▆▇▇████████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▆▆▇▇▇▇▇▇▇▇▇█▇▇█▇▇▇▇▇▇████▇██████████▇█
val_loss,▅▁▁▁▂▂▃▃▃▃▄▄▅▄▄▅▄▅▅▅▆▇▆▆▄▅▆▆▅▆▅▅▆▆▇▇▇▆█▇

0,1
train_accuracy,99.855
train_loss,4e-05
val_accuracy,87.48
val_loss,0.00654


In [None]:
if EVAL_TEST:
  net = NetConv()
  net.load_state_dict(torch.load(PATH))
  images = images.to(device)

  correct = 0
  total = 0
  # since we're not training, we don't need to calculate the gradients for our outputs
  with torch.no_grad():
      for data in testloader:
          images, labels = data
          # calculate outputs by running images through the network
          outputs = net(images)
          # the class with the highest energy is what we choose as prediction
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 86 %


In [None]:
if EVAL_TEST:
  # prepare to count predictions for each class
  correct_pred = {classname: 0 for classname in classes}
  total_pred = {classname: 0 for classname in classes}

  # again no gradients needed
  with torch.no_grad():
      for data in testloader:
          images, labels = data
          outputs = net(images)
          _, predictions = torch.max(outputs, 1)
          # collect the correct predictions for each class
          for label, prediction in zip(labels, predictions):
              if label == prediction:
                  correct_pred[classes[label]] += 1
              total_pred[classes[label]] += 1


  # print accuracy for each class
  for classname, correct_count in correct_pred.items():
      accuracy = 100 * float(correct_count) / total_pred[classname]
      print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: plane is 83.8 %
Accuracy for class: car   is 93.5 %
Accuracy for class: bird  is 84.4 %
Accuracy for class: cat   is 73.4 %
Accuracy for class: deer  is 86.9 %
Accuracy for class: dog   is 78.2 %
Accuracy for class: frog  is 90.4 %
Accuracy for class: horse is 87.5 %
Accuracy for class: ship  is 92.6 %
Accuracy for class: truck is 90.2 %
