In [2]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import random_split
from torch.utils.data import DataLoader

from functools import partial

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.air.checkpoint import Checkpoint

import torchvision
from torchvision import datasets
from torchvision import transforms

from configs import Inputs
import utils.augmentations
from utils.data import FullRadiographSexDataset

import numpy as np
from PIL import Image, ImageFile

In [3]:
DEVICE = torch.device("cpu" if torch.cuda.device_count() < 1 else "cuda:0")
batch_size = 4
CLASSES = 2
DIR = os.getcwd()
EPOCHS = 30
N_TRAIN_EXAMPLES = batch_size * 30
N_VALID_EXAMPLES = batch_size * 10
gpus_per_trial = 1
cpus_per_trial = 2

print(f"Device: {DEVICE}\nBatch size: {batch_size}\nClasses: {CLASSES}\n\
Dir: {DIR}\nEpochs: {EPOCHS}\n\
Number of training examples: {N_TRAIN_EXAMPLES}\n\
Number of validation examples: {N_VALID_EXAMPLES}")

Device: cuda:0
Batch size: 4
Classes: 2
Dir: /home/david/Documents/iVision/patch-1
Epochs: 30
Number of training examples: 120
Number of validation examples: 40


In [4]:
model = torchvision.models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

Downloading: "https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth" to /home/david/.cache/torch/hub/checkpoints/efficientnet_v2_s-dd5fe13b.pth
100%|██████████| 82.7M/82.7M [00:04<00:00, 19.3MB/s]


In [5]:
from torchvision import transforms as T
img_size = 224

transform = T.Compose([
                T.Resize((img_size,img_size)),
                T.ToTensor(),
                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

In [6]:
from torch.utils.data import DataLoader
from configs import Inputs
from utils.augmentations import get_transforms
from utils.data import FullRadiographSexDataset

val_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                       fold_nums=Inputs().val_folds,
                                       transforms=get_transforms(Inputs(), subset=["val"]))

train_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                         fold_nums=Inputs().train_folds,
                                         transforms=get_transforms(Inputs(), subset=["train"]))

train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=0)
val_dataloader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=0)

Using only horizontal flip augmentation.
Using only horizontal flip augmentation.


In [7]:
config = {
    "lr": tune.choice([1e-2, 1e-3, 1e-4, 1e-5]),
    "optimizer_name": tune.choice(["Adam", "AdamW", "SGD"])
}

In [8]:
def compute_metrics(outputs, labels):
    # convert outputs to the predicted classes
    _, pred = torch.max(outputs, 1)

    # compare predictions to true label
    total = len(labels)
    true_positives = (pred & labels.data.view_as(pred)).sum().item()
    true_negatives = ((1 - pred) & (1 - labels).data.view_as(pred)).sum().item()
    false_positives = (pred & (1 - labels).data.view_as(pred)).sum().item()
    false_negatives = ((1 - pred) & labels.data.view_as(pred)).sum().item()

    return {
        'tp': true_positives,
        'tn': true_negatives,
        'fp': false_positives,
        'fn': false_negatives,
        'total': total,
    }

In [9]:
def objective(config):

    # Gerar o modelo
    model = torchvision.models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
    model.to(DEVICE)

    # Gerar optimizer
    optimizer_name = config['optimizer_name']
    lr = config["lr"]
    
    print("opt_name:", optimizer_name, "\nlr:", lr)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    train_loader, valid_loader = train_dataloader, val_dataloader

    criterion = nn.CrossEntropyLoss()
    
    running_loss, total = 0, 0
    tp, tn, fp, fn = 0, 0, 0, 0
    
    for epoch in range(5):  # loop over the dataset multiple times
        for i, data in enumerate(train_dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # sum up the loss
            running_loss += loss.item() * len(inputs)

            metrics = compute_metrics(outputs, labels)
            tp += metrics['tp']
            tn += metrics['tn']
            fp += metrics['fp']
            fn += metrics['fn']
            total += metrics['total']
            
        if total != 0:
            accuracy = (tp + tn) / total
        else:
            accuracy = 0
        
        if (tp + fp) != 0:
            precision = tp / (tp + fp)
        else:
            precision = 0
        
        if (tp+fn) != 0:
            recall = tp / (tp + fn)
        else:
            recall = 0
        
        if (2 * tp + fp + fn) != 0:
            f1 = 2 * tp / (2 * tp + fp + fn)
        else:
            f1 = 0

        print(f'Train precision: {precision:.4f}')
        print(f'Train recall: {recall:.4f}')
        print(f'Train F1: {f1:.4f}')
        print(f'Training loss: {running_loss / len(train_dataloader):.5f}')
        print(f'Training accuracy: {100*accuracy:.2f} (%)')
        
        # Validation loss
        with torch.no_grad():
            
            running_loss, total = 0, 0
            tp, tn, fp, fn = 0, 0, 0, 0 
            
            for i, data in enumerate(val_dataloader, 0):
                model.eval()
                inputs, labels = data
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # sum up the loss
                running_loss += loss.item() * len(inputs)

                metrics_dict = compute_metrics(outputs, labels)
                tp += metrics_dict['tp']
                tn += metrics_dict['tn']
                fp += metrics_dict['fp']
                fn += metrics_dict['fn']
                total += metrics_dict['total']

            if total != 0:
                accuracy = (tp + tn) / total
            else:
                accuracy = 0
                
            if (tp+fp) != 0:
                precision = tp / (tp + fp)
            else:
                precision = 0
                
            if (tp+fn) != 0:
                recall = tp / (tp + fn)
            else:
                recall = 0
                
            if (2 * tp + fp + fn) != 0:
                f1 = 2 * tp / (2 * tp + fp + fn)
            else:
                f1 = 0

            val_loss=running_loss / len(val_dataloader)
            print(f'Validation loss: {val_loss:.5f}')
            print(f'Validation accuracy: {100*accuracy:.2f} (%)') 
            print(f'Validation precision: {precision:.4f}')
            print(f'Validation recall: {recall:.4f}')
            print(f'Validation F1: {f1:.4f}')
            
            tune.report(loss=val_loss, accuracy=accuracy)

    return accuracy

In [10]:
scheduler = ASHAScheduler(
        max_t=5,
        grace_period=1,
        reduction_factor=2
)

reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"]
)

tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(objective),
            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=N_TRAIN_EXAMPLES,
        ),
        param_space=config,
    )
results = tuner.fit()


2023-01-19 15:11:36,389	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Current time:,2023-01-20 17:44:11
Running for:,"1 days, 02:32:33.33"
Memory:,5.4/11.6 GiB

Trial name,status,loc,lr,optimizer_name,iter,total time (s),loss,accuracy
objective_b7164_00000,TERMINATED,192.168.1.20:16854,0.0001,AdamW,5,1723.44,2.75228,0.564411
objective_b7164_00001,TERMINATED,192.168.1.20:16854,1e-05,AdamW,1,350.487,429.624,0.735867
objective_b7164_00002,TERMINATED,192.168.1.20:16854,0.0001,SGD,5,1731.0,,0.564411
objective_b7164_00003,TERMINATED,192.168.1.20:16854,1e-05,SGD,4,1322.69,6.72623,0.566265
objective_b7164_00004,TERMINATED,192.168.1.20:16854,0.0001,AdamW,2,649.156,2.76762,0.564411
objective_b7164_00005,TERMINATED,192.168.1.20:16854,0.01,Adam,2,646.967,215076000000000.0,0.406858
objective_b7164_00006,TERMINATED,192.168.1.20:16854,0.0001,Adam,5,1613.17,2.74862,0.564411
objective_b7164_00007,TERMINATED,192.168.1.20:16854,0.0001,Adam,2,646.784,2.7772,0.435589
objective_b7164_00008,TERMINATED,192.168.1.20:16854,0.01,SGD,5,1553.77,,0.564411
objective_b7164_00009,TERMINATED,192.168.1.20:16854,0.01,AdamW,1,332.028,3.72543,0.486562


[2m[36m(objective pid=16854)[0m opt_name: AdamW 
[2m[36m(objective pid=16854)[0m lr: 0.0001
[2m[36m(objective pid=16854)[0m Train precision: 0.6849
[2m[36m(objective pid=16854)[0m Train recall: 0.5706
[2m[36m(objective pid=16854)[0m Train F1: 0.6225
[2m[36m(objective pid=16854)[0m Training loss: 2.23725
[2m[36m(objective pid=16854)[0m Training accuracy: 71.78 (%)


Trial name,accuracy,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_b7164_00000,0.564411,2023-01-19_15-40-24,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,5,2.75228,192.168.1.20,16854,1723.44,319.877,1723.44,1674153624,0,,5,b7164_00000,0.00373626
objective_b7164_00001,0.735867,2023-01-19_15-46-15,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,1,429.624,192.168.1.20,16854,350.487,350.487,350.487,1674153975,0,,1,b7164_00001,0.00373626
objective_b7164_00002,0.564411,2023-01-19_16-15-06,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,5,,192.168.1.20,16854,1731.0,356.393,1731.0,1674155706,0,,5,b7164_00002,0.00373626
objective_b7164_00003,0.566265,2023-01-19_16-37-08,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,4,6.72623,192.168.1.20,16854,1322.69,308.909,1322.69,1674157028,0,,4,b7164_00003,0.00373626
objective_b7164_00004,0.564411,2023-01-19_16-47-58,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,2,2.76762,192.168.1.20,16854,649.156,318.627,649.156,1674157678,0,,2,b7164_00004,0.00373626
objective_b7164_00005,0.406858,2023-01-19_16-58-45,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,2,215076000000000.0,192.168.1.20,16854,646.967,317.773,646.967,1674158325,0,,2,b7164_00005,0.00373626
objective_b7164_00006,0.564411,2023-01-19_17-25-38,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,5,2.74862,192.168.1.20,16854,1613.17,321.129,1613.17,1674159938,0,,5,b7164_00006,0.00373626
objective_b7164_00007,0.435589,2023-01-19_17-36-25,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,2,2.7772,192.168.1.20,16854,646.784,317.39,646.784,1674160585,0,,2,b7164_00007,0.00373626
objective_b7164_00008,0.564411,2023-01-19_18-02-18,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,5,,192.168.1.20,16854,1553.77,308.222,1553.77,1674162138,0,,5,b7164_00008,0.00373626
objective_b7164_00009,0.486562,2023-01-19_18-07-50,True,,7d2ad19f81164c6890102a30025605d1,arch-pc,1,3.72543,192.168.1.20,16854,332.028,332.028,332.028,1674162470,0,,1,b7164_00009,0.00373626


[2m[36m(objective pid=16854)[0m Validation loss: 1.76994
[2m[36m(objective pid=16854)[0m Validation accuracy: 80.35 (%)
[2m[36m(objective pid=16854)[0m Validation precision: 0.7510
[2m[36m(objective pid=16854)[0m Validation recall: 0.8213
[2m[36m(objective pid=16854)[0m Validation F1: 0.7846
[2m[36m(objective pid=16854)[0m Train precision: 0.6600
[2m[36m(objective pid=16854)[0m Train recall: 0.4074
[2m[36m(objective pid=16854)[0m Train F1: 0.5038
[2m[36m(objective pid=16854)[0m Training loss: 8226.73241
[2m[36m(objective pid=16854)[0m Training accuracy: 66.38 (%)
[2m[36m(objective pid=16854)[0m Validation loss: 2.76255
[2m[36m(objective pid=16854)[0m Validation accuracy: 56.44 (%)
[2m[36m(objective pid=16854)[0m Validation precision: 0.0000
[2m[36m(objective pid=16854)[0m Validation recall: 0.0000
[2m[36m(objective pid=16854)[0m Validation F1: 0.0000
[2m[36m(objective pid=16854)[0m Train precision: 0.5000
[2m[36m(objective pid=16854)[0

2023-01-20 17:44:11,172	INFO tune.py:762 -- Total run time: 95553.56 seconds (95553.32 seconds for the tuning loop).


In [20]:
best_result = results.get_best_result("loss", "min", "last")
print("Best trial config: {}".format(best_result.config))
print("Best trial final validation loss: {}".format(best_result.metrics["loss"]))
print("Best trial final validation accuracy: {}".format(best_result.metrics["accuracy"]))

Best trial config: {'lr': 1e-05, 'optimizer_name': 'AdamW'}
Best trial final validation loss: 1.76038169266863
Best trial final validation accuracy: 0.8294717330861909


In [21]:
resultado = results.dataframe()
resultado.rename(columns={'config/optimizer_name': 'opt', 'config/lr': 'lr'}, inplace = True)
df = resultado[['loss', 'accuracy', 'lr', 'opt', 'training_iteration']]
df

AttributeError: 'ResultGrid' object has no attribute 'dataframe'