In [4]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import random_split
from torch.utils.data import DataLoader

from functools import partial

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.air.checkpoint import Checkpoint

import torchvision
from torchvision import datasets
from torchvision import transforms

from configs import Inputs
import utils.augmentations
from utils.data import FullRadiographSexDataset

import numpy as np
from PIL import Image, ImageFile

In [5]:
DEVICE = torch.device("cpu" if torch.cuda.device_count() < 1 else "cuda:0")
batch_size = 4
CLASSES = 2
DIR = os.getcwd()
EPOCHS = 30
N_TRAIN_EXAMPLES = batch_size * 30
N_VALID_EXAMPLES = batch_size * 10
gpus_per_trial = 1
cpus_per_trial = 2

print(f"Device: {DEVICE}\nBatch size: {batch_size}\nClasses: {CLASSES}\n\
Dir: {DIR}\nEpochs: {EPOCHS}\n\
Number of training examples: {N_TRAIN_EXAMPLES}\n\
Number of validation examples: {N_VALID_EXAMPLES}")

Device: cuda:0
Batch size: 4
Classes: 2
Dir: /home/david/Documents/iVision/patch-1
Epochs: 30
Number of training examples: 120
Number of validation examples: 40


In [6]:
model = torchvision.models.efficientnet_b0(weights=torchvision.models.EfficientNet_B0_Weights.IMAGENET1K_V1)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

In [7]:
from torchvision import transforms as T
img_size = 224

transform = T.Compose([
                T.Resize((img_size,img_size)),
                T.ToTensor(),
                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

In [8]:
from torch.utils.data import DataLoader
from configs import Inputs
from utils.augmentations import get_transforms
from utils.data import FullRadiographSexDataset

val_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                       fold_nums=Inputs().val_folds,
                                       transforms=get_transforms(Inputs(), subset=["val"]))

train_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                         fold_nums=Inputs().train_folds,
                                         transforms=get_transforms(Inputs(), subset=["train"]))

train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=0)
val_dataloader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=0)

Using only horizontal flip augmentation.
Using only horizontal flip augmentation.


In [9]:
config = {
    "lr": tune.choice([1e-2, 1e-3, 1e-4, 1e-5]),
    "optimizer_name": tune.choice(["Adam", "AdamW", "SGD"])
}

In [10]:
def compute_metrics(outputs, labels):
    # convert outputs to the predicted classes
    _, pred = torch.max(outputs, 1)

    # compare predictions to true label
    total = len(labels)
    true_positives = (pred & labels.data.view_as(pred)).sum().item()
    true_negatives = ((1 - pred) & (1 - labels).data.view_as(pred)).sum().item()
    false_positives = (pred & (1 - labels).data.view_as(pred)).sum().item()
    false_negatives = ((1 - pred) & labels.data.view_as(pred)).sum().item()

    return {
        'tp': true_positives,
        'tn': true_negatives,
        'fp': false_positives,
        'fn': false_negatives,
        'total': total,
    }

In [11]:
def objective(config):

    # Gerar o modelo
    model = torchvision.models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
    model.to(DEVICE)

    # Gerar optimizer
    optimizer_name = config['optimizer_name']
    lr = config["lr"]
    
    print("opt_name:", optimizer_name, "\nlr:", lr)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    train_loader, valid_loader = train_dataloader, val_dataloader

    criterion = nn.CrossEntropyLoss()
    
    running_loss, total = 0, 0
    tp, tn, fp, fn = 0, 0, 0, 0
    
    for epoch in range(5):  # loop over the dataset multiple times
        for i, data in enumerate(train_dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # sum up the loss
            running_loss += loss.item() * len(inputs)

            metrics = compute_metrics(outputs, labels)
            tp += metrics['tp']
            tn += metrics['tn']
            fp += metrics['fp']
            fn += metrics['fn']
            total += metrics['total']
            
        if total != 0:
            accuracy = (tp + tn) / total
        else:
            accuracy = 0
        
        if (tp + fp) != 0:
            precision = tp / (tp + fp)
        else:
            precision = 0
        
        if (tp+fn) != 0:
            recall = tp / (tp + fn)
        else:
            recall = 0
        
        if (2 * tp + fp + fn) != 0:
            f1 = 2 * tp / (2 * tp + fp + fn)
        else:
            f1 = 0

        print(f'Train precision: {precision:.4f}')
        print(f'Train recall: {recall:.4f}')
        print(f'Train F1: {f1:.4f}')
        print(f'Training loss: {running_loss / len(train_dataloader):.5f}')
        print(f'Training accuracy: {100*accuracy:.2f} (%)')
        
        # Validation loss
        with torch.no_grad():
            
            running_loss, total = 0, 0
            tp, tn, fp, fn = 0, 0, 0, 0 
            
            for i, data in enumerate(val_dataloader, 0):
                model.eval()
                inputs, labels = data
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # sum up the loss
                running_loss += loss.item() * len(inputs)

                metrics_dict = compute_metrics(outputs, labels)
                tp += metrics_dict['tp']
                tn += metrics_dict['tn']
                fp += metrics_dict['fp']
                fn += metrics_dict['fn']
                total += metrics_dict['total']

            if total != 0:
                accuracy = (tp + tn) / total
            else:
                accuracy = 0
                
            if (tp+fp) != 0:
                precision = tp / (tp + fp)
            else:
                precision = 0
                
            if (tp+fn) != 0:
                recall = tp / (tp + fn)
            else:
                recall = 0
                
            if (2 * tp + fp + fn) != 0:
                f1 = 2 * tp / (2 * tp + fp + fn)
            else:
                f1 = 0

            val_loss=running_loss / len(val_dataloader)
            print(f'Validation loss: {val_loss:.5f}')
            print(f'Validation accuracy: {100*accuracy:.2f} (%)') 
            print(f'Validation precision: {precision:.4f}')
            print(f'Validation recall: {recall:.4f}')
            print(f'Validation F1: {f1:.4f}')
            
            tune.report(loss=val_loss, accuracy=accuracy)

    return accuracy

In [12]:
scheduler = ASHAScheduler(
        max_t=5,
        grace_period=1,
        reduction_factor=2
)

reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"]
)

tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(objective),
            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=N_TRAIN_EXAMPLES,
        ),
        param_space=config,
    )
results = tuner.fit()


2023-01-21 13:00:18,014	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Current time:,2023-01-22 14:53:46
Running for:,"1 days, 01:53:27.35"
Memory:,5.9/11.6 GiB

Trial name,status,loc,lr,optimizer_name,iter,total time (s),loss,accuracy
objective_b426b_00000,TERMINATED,192.168.1.20:86030,0.001,AdamW,5,1731.72,2.74037,0.564411
objective_b426b_00001,TERMINATED,192.168.1.20:86030,1e-05,SGD,5,1633.26,,0.564411
objective_b426b_00002,TERMINATED,192.168.1.20:86030,0.01,Adam,2,644.148,1.90116e+17,0.539388
objective_b426b_00003,TERMINATED,192.168.1.20:86030,0.0001,SGD,5,1553.74,2.38334,0.734013
objective_b426b_00004,TERMINATED,192.168.1.20:86030,1e-05,Adam,1,327.898,11.9024,0.688601
objective_b426b_00005,TERMINATED,192.168.1.20:86030,0.0001,Adam,2,644.926,2.76916,0.564411
objective_b426b_00006,TERMINATED,192.168.1.20:86030,1e-05,Adam,2,644.698,38.253,0.796108
objective_b426b_00007,TERMINATED,192.168.1.20:86030,1e-05,AdamW,5,1605.47,2.71086,0.791474
objective_b426b_00008,TERMINATED,192.168.1.20:86030,0.0001,AdamW,2,645.764,2.76802,0.564411
objective_b426b_00009,TERMINATED,192.168.1.20:86030,0.0001,Adam,4,1272.42,2.75434,0.564411


[2m[36m(objective pid=86030)[0m opt_name: AdamW 
[2m[36m(objective pid=86030)[0m lr: 0.001
[2m[36m(objective pid=86030)[0m Train precision: 0.6202
[2m[36m(objective pid=86030)[0m Train recall: 0.4856
[2m[36m(objective pid=86030)[0m Train F1: 0.5447
[2m[36m(objective pid=86030)[0m Training loss: 2.54237
[2m[36m(objective pid=86030)[0m Training accuracy: 66.89 (%)


Trial name,accuracy,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_b426b_00000,0.564411,2023-01-21_13-29-14,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,5,2.74037,192.168.1.20,86030,1731.72,344.134,1731.72,1674318554,0,,5,b426b_00000,0.00342393
objective_b426b_00001,0.564411,2023-01-21_13-56-28,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,5,,192.168.1.20,86030,1633.26,306.827,1633.26,1674320188,0,,5,b426b_00001,0.00342393
objective_b426b_00002,0.539388,2023-01-21_14-07-12,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,2,1.90116e+17,192.168.1.20,86030,644.148,316.762,644.148,1674320832,0,,2,b426b_00002,0.00342393
objective_b426b_00003,0.734013,2023-01-21_14-33-05,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,5,2.38334,192.168.1.20,86030,1553.74,308.731,1553.74,1674322385,0,,5,b426b_00003,0.00342393
objective_b426b_00004,0.688601,2023-01-21_14-38-33,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,1,11.9024,192.168.1.20,86030,327.898,327.898,327.898,1674322713,0,,1,b426b_00004,0.00342393
objective_b426b_00005,0.564411,2023-01-21_14-49-18,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,2,2.76916,192.168.1.20,86030,644.926,316.899,644.926,1674323358,0,,2,b426b_00005,0.00342393
objective_b426b_00006,0.796108,2023-01-21_15-00-03,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,2,38.253,192.168.1.20,86030,644.698,316.86,644.698,1674324003,0,,2,b426b_00006,0.00342393
objective_b426b_00007,0.791474,2023-01-21_15-26-49,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,5,2.71086,192.168.1.20,86030,1605.47,318.918,1605.47,1674325609,0,,5,b426b_00007,0.00342393
objective_b426b_00008,0.564411,2023-01-21_15-37-34,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,2,2.76802,192.168.1.20,86030,645.764,317.51,645.764,1674326254,0,,2,b426b_00008,0.00342393
objective_b426b_00009,0.564411,2023-01-21_15-58-47,True,,69e6eed4af5c47e1a0e81902469decef,arch-pc,4,2.75434,192.168.1.20,86030,1272.42,314.947,1272.42,1674327527,0,,4,b426b_00009,0.00342393


[2m[36m(objective pid=86030)[0m Validation loss: 8.11541
[2m[36m(objective pid=86030)[0m Validation accuracy: 57.46 (%)
[2m[36m(objective pid=86030)[0m Validation precision: 1.0000
[2m[36m(objective pid=86030)[0m Validation recall: 0.0234
[2m[36m(objective pid=86030)[0m Validation F1: 0.0457
[2m[36m(objective pid=86030)[0m Train precision: 0.4429
[2m[36m(objective pid=86030)[0m Train recall: 0.0859
[2m[36m(objective pid=86030)[0m Train F1: 0.1439
[2m[36m(objective pid=86030)[0m Training loss: 4672.67300
[2m[36m(objective pid=86030)[0m Training accuracy: 57.18 (%)
[2m[36m(objective pid=86030)[0m Validation loss: 2.74018
[2m[36m(objective pid=86030)[0m Validation accuracy: 56.44 (%)
[2m[36m(objective pid=86030)[0m Validation precision: 0.0000
[2m[36m(objective pid=86030)[0m Validation recall: 0.0000
[2m[36m(objective pid=86030)[0m Validation F1: 0.0000
[2m[36m(objective pid=86030)[0m Train precision: 0.0000
[2m[36m(objective pid=86030)[0

2023-01-22 14:53:46,973	INFO tune.py:762 -- Total run time: 93207.54 seconds (93207.33 seconds for the tuning loop).


In [13]:
best_result = results.get_best_result("loss", "min", "last")
print("Best trial config: {}".format(best_result.config))
print("Best trial final validation loss: {}".format(best_result.metrics["loss"]))
print("Best trial final validation accuracy: {}".format(best_result.metrics["accuracy"]))

Best trial config: {'lr': 1e-05, 'optimizer_name': 'AdamW'}
Best trial final validation loss: 1.4980830340749687
Best trial final validation accuracy: 0.8517145505097312


In [14]:
resultado = results.get_dataframe()
resultado.rename(columns={'config/optimizer_name': 'opt', 'config/lr': 'lr'}, inplace = True)
df = resultado[['loss', 'accuracy', 'lr', 'opt', 'training_iteration']]
df

Unnamed: 0,loss,accuracy,lr,opt,training_iteration
0,2.740372e+00,0.564411,0.00100,AdamW,5
1,,0.564411,0.00001,SGD,5
2,1.901164e+17,0.539388,0.01000,Adam,2
3,2.383336e+00,0.734013,0.00010,SGD,5
4,1.190245e+01,0.688601,0.00001,Adam,1
...,...,...,...,...,...
115,2.762800e+00,0.511585,0.00001,SGD,1
116,2.753374e+00,0.564411,0.00010,AdamW,2
117,5.000067e+01,0.434662,0.00010,Adam,2
118,1.309080e+01,0.555144,0.00010,SGD,1
