In [1]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import random_split
from torch.utils.data import DataLoader

from functools import partial

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.air.checkpoint import Checkpoint

import torchvision
from torchvision import datasets
from torchvision import transforms

from configs import Inputs
import utils.augmentations
from utils.data import FullRadiographSexDataset

import numpy as np
from PIL import Image, ImageFile

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEVICE = torch.device("cpu" if torch.cuda.device_count() < 1 else "cuda:0")
batch_size = 4
CLASSES = 2
DIR = os.getcwd()
EPOCHS = 30
N_TRAIN_EXAMPLES = batch_size * 30
N_VALID_EXAMPLES = batch_size * 10
gpus_per_trial = 1
cpus_per_trial = 2

print(f"Device: {DEVICE}\nBatch size: {batch_size}\nClasses: {CLASSES}\n\
Dir: {DIR}\nEpochs: {EPOCHS}\n\
Number of training examples: {N_TRAIN_EXAMPLES}\n\
Number of validation examples: {N_VALID_EXAMPLES}")

Device: cuda:0
Batch size: 4
Classes: 2
Dir: /home/david/Documents/iVision/patch-1
Epochs: 30
Number of training examples: 120
Number of validation examples: 40


In [3]:
model = torchvision.models.efficientnet_v2_m(weights=torchvision.models.EfficientNet_V2_M_Weights.IMAGENET1K_V1)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

In [4]:
from torchvision import transforms as T
img_size = 224

transform = T.Compose([
                T.Resize((img_size,img_size)),
                T.ToTensor(),
                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

In [5]:
from torch.utils.data import DataLoader
from configs import Inputs
from utils.augmentations import get_transforms
from utils.data import FullRadiographSexDataset

val_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                       fold_nums=Inputs().val_folds,
                                       transforms=get_transforms(Inputs(), subset=["val"]))

train_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                         fold_nums=Inputs().train_folds,
                                         transforms=get_transforms(Inputs(), subset=["train"]))

train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=0)
val_dataloader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=0)

Using only horizontal flip augmentation.
Using only horizontal flip augmentation.


In [6]:
config = {
    "lr": tune.choice([1e-2, 1e-3, 1e-4, 1e-5]),
    "optimizer_name": tune.choice(["Adam", "AdamW", "SGD"])
}

In [7]:
def compute_metrics(outputs, labels):
    # convert outputs to the predicted classes
    _, pred = torch.max(outputs, 1)

    # compare predictions to true label
    total = len(labels)
    true_positives = (pred & labels.data.view_as(pred)).sum().item()
    true_negatives = ((1 - pred) & (1 - labels).data.view_as(pred)).sum().item()
    false_positives = (pred & (1 - labels).data.view_as(pred)).sum().item()
    false_negatives = ((1 - pred) & labels.data.view_as(pred)).sum().item()

    return {
        'tp': true_positives,
        'tn': true_negatives,
        'fp': false_positives,
        'fn': false_negatives,
        'total': total,
    }

In [8]:
def objective(config):

    # Gerar o modelo
    model = torchvision.models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
    model.to(DEVICE)

    # Gerar optimizer
    optimizer_name = config['optimizer_name']
    lr = config["lr"]
    
    print("opt_name:", optimizer_name, "\nlr:", lr)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    train_loader, valid_loader = train_dataloader, val_dataloader

    criterion = nn.CrossEntropyLoss()
    
    running_loss, total = 0, 0
    tp, tn, fp, fn = 0, 0, 0, 0
    
    for epoch in range(5):  # loop over the dataset multiple times
        for i, data in enumerate(train_dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # sum up the loss
            running_loss += loss.item() * len(inputs)

            metrics = compute_metrics(outputs, labels)
            tp += metrics['tp']
            tn += metrics['tn']
            fp += metrics['fp']
            fn += metrics['fn']
            total += metrics['total']
            
        if total != 0:
            accuracy = (tp + tn) / total
        else:
            accuracy = 0
        
        if (tp + fp) != 0:
            precision = tp / (tp + fp)
        else:
            precision = 0
        
        if (tp+fn) != 0:
            recall = tp / (tp + fn)
        else:
            recall = 0
        
        if (2 * tp + fp + fn) != 0:
            f1 = 2 * tp / (2 * tp + fp + fn)
        else:
            f1 = 0

        print(f'Train precision: {precision:.4f}')
        print(f'Train recall: {recall:.4f}')
        print(f'Train F1: {f1:.4f}')
        print(f'Training loss: {running_loss / len(train_dataloader):.5f}')
        print(f'Training accuracy: {100*accuracy:.2f} (%)')
        
        # Validation loss
        with torch.no_grad():
            
            running_loss, total = 0, 0
            tp, tn, fp, fn = 0, 0, 0, 0 
            
            for i, data in enumerate(val_dataloader, 0):
                model.eval()
                inputs, labels = data
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # sum up the loss
                running_loss += loss.item() * len(inputs)

                metrics_dict = compute_metrics(outputs, labels)
                tp += metrics_dict['tp']
                tn += metrics_dict['tn']
                fp += metrics_dict['fp']
                fn += metrics_dict['fn']
                total += metrics_dict['total']

            if total != 0:
                accuracy = (tp + tn) / total
            else:
                accuracy = 0
                
            if (tp+fp) != 0:
                precision = tp / (tp + fp)
            else:
                precision = 0
                
            if (tp+fn) != 0:
                recall = tp / (tp + fn)
            else:
                recall = 0
                
            if (2 * tp + fp + fn) != 0:
                f1 = 2 * tp / (2 * tp + fp + fn)
            else:
                f1 = 0

            val_loss=running_loss / len(val_dataloader)
            print(f'Validation loss: {val_loss:.5f}')
            print(f'Validation accuracy: {100*accuracy:.2f} (%)') 
            print(f'Validation precision: {precision:.4f}')
            print(f'Validation recall: {recall:.4f}')
            print(f'Validation F1: {f1:.4f}')
            
            tune.report(loss=val_loss, accuracy=accuracy)

    return accuracy

In [9]:
scheduler = ASHAScheduler(
        max_t=5,
        grace_period=1,
        reduction_factor=2
)

reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"]
)

tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(objective),
            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=N_TRAIN_EXAMPLES,
        ),
        param_space=config,
    )
results = tuner.fit()


2023-01-23 16:34:15,322	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Current time:,2023-01-24 18:35:51
Running for:,"1 days, 02:01:34.60"
Memory:,6.9/11.6 GiB

Trial name,status,loc,lr,optimizer_name,iter,total time (s),loss,accuracy
objective_eca83_00000,TERMINATED,192.168.1.20:171987,1e-05,Adam,5,1748.73,1.81527,0.81279
objective_eca83_00001,TERMINATED,192.168.1.20:171987,0.01,Adam,1,375.156,2.33344,0.718258
objective_eca83_00002,TERMINATED,192.168.1.20:171987,0.001,Adam,1,376.828,5.35117,0.445783
objective_eca83_00003,TERMINATED,192.168.1.20:171987,0.0001,SGD,1,365.999,2.75973,0.529194
objective_eca83_00004,TERMINATED,192.168.1.20:171987,0.001,SGD,1,373.836,42.348,0.608897
objective_eca83_00005,TERMINATED,192.168.1.20:171987,0.01,AdamW,1,401.806,6.31346,0.435589
objective_eca83_00006,TERMINATED,192.168.1.20:171987,0.001,AdamW,1,393.944,4.30497,0.489342
objective_eca83_00007,TERMINATED,192.168.1.20:171987,0.0001,Adam,2,721.817,2.76562,0.564411
objective_eca83_00008,TERMINATED,192.168.1.20:171987,0.0001,AdamW,2,652.535,2.76453,0.564411
objective_eca83_00009,TERMINATED,192.168.1.20:171987,1e-05,AdamW,1,331.543,15.6348,0.726599


[2m[36m(objective pid=171987)[0m opt_name: Adam 
[2m[36m(objective pid=171987)[0m lr: 1e-05
[2m[36m(objective pid=171987)[0m Train precision: 0.5536
[2m[36m(objective pid=171987)[0m Train recall: 0.2352
[2m[36m(objective pid=171987)[0m Train F1: 0.3301
[2m[36m(objective pid=171987)[0m Training loss: 2.61128
[2m[36m(objective pid=171987)[0m Training accuracy: 61.08 (%)


Trial name,accuracy,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_eca83_00000,0.81279,2023-01-23_17-03-29,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,5,1.81527,192.168.1.20,171987,1748.73,354.104,1748.73,1674504209,0,,5,eca83_00000,0.00399852
objective_eca83_00001,0.718258,2023-01-23_17-09-44,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,1,2.33344,192.168.1.20,171987,375.156,375.156,375.156,1674504584,0,,1,eca83_00001,0.00399852
objective_eca83_00002,0.445783,2023-01-23_17-16-01,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,1,5.35117,192.168.1.20,171987,376.828,376.828,376.828,1674504961,0,,1,eca83_00002,0.00399852
objective_eca83_00003,0.529194,2023-01-23_17-22-07,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,1,2.75973,192.168.1.20,171987,365.999,365.999,365.999,1674505327,0,,1,eca83_00003,0.00399852
objective_eca83_00004,0.608897,2023-01-23_17-28-21,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,1,42.348,192.168.1.20,171987,373.836,373.836,373.836,1674505701,0,,1,eca83_00004,0.00399852
objective_eca83_00005,0.435589,2023-01-23_17-35-03,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,1,6.31346,192.168.1.20,171987,401.806,401.806,401.806,1674506103,0,,1,eca83_00005,0.00399852
objective_eca83_00006,0.489342,2023-01-23_17-41-37,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,1,4.30497,192.168.1.20,171987,393.944,393.944,393.944,1674506497,0,,1,eca83_00006,0.00399852
objective_eca83_00007,0.564411,2023-01-23_17-53-38,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,2,2.76562,192.168.1.20,171987,721.817,348.903,721.817,1674507218,0,,2,eca83_00007,0.00399852
objective_eca83_00008,0.564411,2023-01-23_18-04-31,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,2,2.76453,192.168.1.20,171987,652.535,322.187,652.535,1674507871,0,,2,eca83_00008,0.00399852
objective_eca83_00009,0.726599,2023-01-23_18-10-02,True,,5f2370b31d3e4ec98f07f827d49fb6a0,arch-pc,1,15.6348,192.168.1.20,171987,331.543,331.543,331.543,1674508202,0,,1,eca83_00009,0.00399852


[2m[36m(objective pid=171987)[0m Validation loss: 2.25195
[2m[36m(objective pid=171987)[0m Validation accuracy: 74.24 (%)
[2m[36m(objective pid=171987)[0m Validation precision: 0.8380
[2m[36m(objective pid=171987)[0m Validation recall: 0.5064
[2m[36m(objective pid=171987)[0m Validation F1: 0.6313
[2m[36m(objective pid=171987)[0m Train precision: 0.7497
[2m[36m(objective pid=171987)[0m Train recall: 0.4827
[2m[36m(objective pid=171987)[0m Train F1: 0.5873
[2m[36m(objective pid=171987)[0m Training loss: 3.91801
[2m[36m(objective pid=171987)[0m Training accuracy: 71.58 (%)
[2m[36m(objective pid=171987)[0m Validation loss: 2.08655
[2m[36m(objective pid=171987)[0m Validation accuracy: 76.74 (%)
[2m[36m(objective pid=171987)[0m Validation precision: 0.7365
[2m[36m(objective pid=171987)[0m Validation recall: 0.7255
[2m[36m(objective pid=171987)[0m Validation F1: 0.7310
[2m[36m(objective pid=171987)[0m Train precision: 0.7548
[2m[36m(objective 

2023-01-24 18:35:51,625	INFO tune.py:762 -- Total run time: 93694.81 seconds (93694.58 seconds for the tuning loop).


In [10]:
best_result = results.get_best_result("loss", "min", "last")
print("Best trial config: {}".format(best_result.config))
print("Best trial final validation loss: {}".format(best_result.metrics["loss"]))
print("Best trial final validation accuracy: {}".format(best_result.metrics["accuracy"]))

Best trial config: {'lr': 1e-05, 'optimizer_name': 'Adam'}
Best trial final validation loss: 1.8152655581288315
Best trial final validation accuracy: 0.8127896200185357


In [11]:
resultado = results.get_dataframe()
resultado.rename(columns={'config/optimizer_name': 'opt', 'config/lr': 'lr'}, inplace = True)
df = resultado[['loss', 'accuracy', 'lr', 'opt', 'training_iteration']]
df

Unnamed: 0,loss,accuracy,lr,opt,training_iteration
0,1.815266,0.812790,0.00001,Adam,5
1,2.333441,0.718258,0.01000,Adam,1
2,5.351167,0.445783,0.00100,Adam,1
3,2.759729,0.529194,0.00010,SGD,1
4,42.348009,0.608897,0.00100,SGD,1
...,...,...,...,...,...
115,2.322307,0.787766,0.00001,AdamW,5
116,2.771061,0.435589,0.00010,AdamW,2
117,4.577206,0.696015,0.00100,AdamW,1
118,2.278255,0.718258,0.00001,Adam,4
