In [11]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import random_split
from torch.utils.data import DataLoader

from functools import partial

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.air.checkpoint import Checkpoint

import torchvision
from torchvision import datasets
from torchvision import transforms

from configs import Inputs
import utils.augmentations
from utils.data import FullRadiographSexDataset

import numpy as np
from PIL import Image, ImageFile

In [12]:
DEVICE = torch.device("cpu" if torch.cuda.device_count() < 1 else "cuda:0")
batch_size = 4
CLASSES = 2
DIR = os.getcwd()
EPOCHS = 30
N_TRAIN_EXAMPLES = batch_size * 30
N_VALID_EXAMPLES = batch_size * 10
gpus_per_trial = 1
cpus_per_trial = 2

print(f"Device: {DEVICE}\nBatch size: {batch_size}\nClasses: {CLASSES}\n\
Dir: {DIR}\nEpochs: {EPOCHS}\n\
Number of training examples: {N_TRAIN_EXAMPLES}\n\
Number of validation examples: {N_VALID_EXAMPLES}")

Device: cuda:0
Batch size: 4
Classes: 2
Dir: /home/david/Documents/iVision/patch-1
Epochs: 30
Number of training examples: 120
Number of validation examples: 40


In [13]:
model = torchvision.models.efficientnet_b7(weights=torchvision.models.EfficientNet_B7_Weights.IMAGENET1K_V1)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

In [14]:
from torchvision import transforms as T
img_size = 224

transform = T.Compose([
                T.Resize((img_size,img_size)),
                T.ToTensor(),
                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

In [15]:
from torch.utils.data import DataLoader
from configs import Inputs
from utils.augmentations import get_transforms
from utils.data import FullRadiographSexDataset

val_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                       fold_nums=Inputs().val_folds,
                                       transforms=get_transforms(Inputs(), subset=["val"]))

train_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                         fold_nums=Inputs().train_folds,
                                         transforms=get_transforms(Inputs(), subset=["train"]))

train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=0)
val_dataloader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=0)

Using only horizontal flip augmentation.
Using only horizontal flip augmentation.


In [16]:
config = {
    "lr": tune.choice([1e-2, 1e-3, 1e-4, 1e-5]),
    "optimizer_name": tune.choice(["Adam", "AdamW", "SGD"])
}

In [17]:
def compute_metrics(outputs, labels):
    # convert outputs to the predicted classes
    _, pred = torch.max(outputs, 1)

    # compare predictions to true label
    total = len(labels)
    true_positives = (pred & labels.data.view_as(pred)).sum().item()
    true_negatives = ((1 - pred) & (1 - labels).data.view_as(pred)).sum().item()
    false_positives = (pred & (1 - labels).data.view_as(pred)).sum().item()
    false_negatives = ((1 - pred) & labels.data.view_as(pred)).sum().item()

    return {
        'tp': true_positives,
        'tn': true_negatives,
        'fp': false_positives,
        'fn': false_negatives,
        'total': total,
    }

In [18]:
def objective(config):

    # Gerar o modelo
    model = torchvision.models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
    model.to(DEVICE)

    # Gerar optimizer
    optimizer_name = config['optimizer_name']
    lr = config["lr"]
    
    print("opt_name:", optimizer_name, "\nlr:", lr)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    train_loader, valid_loader = train_dataloader, val_dataloader

    criterion = nn.CrossEntropyLoss()
    
    running_loss, total = 0, 0
    tp, tn, fp, fn = 0, 0, 0, 0
    
    for epoch in range(5):  # loop over the dataset multiple times
        for i, data in enumerate(train_dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # sum up the loss
            running_loss += loss.item() * len(inputs)

            metrics = compute_metrics(outputs, labels)
            tp += metrics['tp']
            tn += metrics['tn']
            fp += metrics['fp']
            fn += metrics['fn']
            total += metrics['total']
            
        if total != 0:
            accuracy = (tp + tn) / total
        else:
            accuracy = 0
        
        if (tp + fp) != 0:
            precision = tp / (tp + fp)
        else:
            precision = 0
        
        if (tp+fn) != 0:
            recall = tp / (tp + fn)
        else:
            recall = 0
        
        if (2 * tp + fp + fn) != 0:
            f1 = 2 * tp / (2 * tp + fp + fn)
        else:
            f1 = 0

        print(f'Train precision: {precision:.4f}')
        print(f'Train recall: {recall:.4f}')
        print(f'Train F1: {f1:.4f}')
        print(f'Training loss: {running_loss / len(train_dataloader):.5f}')
        print(f'Training accuracy: {100*accuracy:.2f} (%)')
        
        # Validation loss
        with torch.no_grad():
            
            running_loss, total = 0, 0
            tp, tn, fp, fn = 0, 0, 0, 0 
            
            for i, data in enumerate(val_dataloader, 0):
                model.eval()
                inputs, labels = data
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # sum up the loss
                running_loss += loss.item() * len(inputs)

                metrics_dict = compute_metrics(outputs, labels)
                tp += metrics_dict['tp']
                tn += metrics_dict['tn']
                fp += metrics_dict['fp']
                fn += metrics_dict['fn']
                total += metrics_dict['total']

            if total != 0:
                accuracy = (tp + tn) / total
            else:
                accuracy = 0
                
            if (tp+fp) != 0:
                precision = tp / (tp + fp)
            else:
                precision = 0
                
            if (tp+fn) != 0:
                recall = tp / (tp + fn)
            else:
                recall = 0
                
            if (2 * tp + fp + fn) != 0:
                f1 = 2 * tp / (2 * tp + fp + fn)
            else:
                f1 = 0

            val_loss=running_loss / len(val_dataloader)
            print(f'Validation loss: {val_loss:.5f}')
            print(f'Validation accuracy: {100*accuracy:.2f} (%)') 
            print(f'Validation precision: {precision:.4f}')
            print(f'Validation recall: {recall:.4f}')
            print(f'Validation F1: {f1:.4f}')
            
            tune.report(loss=val_loss, accuracy=accuracy)

    return accuracy

In [19]:
scheduler = ASHAScheduler(
        max_t=5,
        grace_period=1,
        reduction_factor=2
)

reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"]
)

tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(objective),
            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=N_TRAIN_EXAMPLES,
        ),
        param_space=config,
    )
results = tuner.fit()


0,1
Current time:,2023-01-23 16:01:49
Running for:,"1 days, 00:04:43.15"
Memory:,6.7/11.6 GiB

Trial name,status,loc,lr,optimizer_name,iter,total time (s),loss,accuracy
objective_90bf0_00000,TERMINATED,192.168.1.20:128471,0.001,Adam,5,1691.03,2.74056,0.564411
objective_90bf0_00001,TERMINATED,192.168.1.20:128471,0.001,Adam,4,1298.73,2.7393,0.564411
objective_90bf0_00002,TERMINATED,192.168.1.20:128471,0.0001,Adam,2,655.053,2.76378,0.564411
objective_90bf0_00003,TERMINATED,192.168.1.20:128471,0.0001,AdamW,2,658.071,2.76269,0.564411
objective_90bf0_00004,TERMINATED,192.168.1.20:128471,1e-05,AdamW,1,334.965,2.29474,0.748842
objective_90bf0_00005,TERMINATED,192.168.1.20:128471,0.001,SGD,1,324.426,2.64523,0.598703
objective_90bf0_00006,TERMINATED,192.168.1.20:128471,0.0001,Adam,2,655.062,2.76747,0.564411
objective_90bf0_00007,TERMINATED,192.168.1.20:128471,1e-05,AdamW,4,1308.52,3.94017,0.673772
objective_90bf0_00008,TERMINATED,192.168.1.20:128471,0.01,Adam,1,336.359,8.69234,0.585728
objective_90bf0_00009,TERMINATED,192.168.1.20:128471,0.001,Adam,1,333.279,2.32481,0.696942


[2m[36m(objective pid=128471)[0m opt_name: Adam 
[2m[36m(objective pid=128471)[0m lr: 0.001
[2m[36m(objective pid=128471)[0m Train precision: 0.5871
[2m[36m(objective pid=128471)[0m Train recall: 0.4552
[2m[36m(objective pid=128471)[0m Train F1: 0.5128
[2m[36m(objective pid=128471)[0m Training loss: 2.54799
[2m[36m(objective pid=128471)[0m Training accuracy: 64.73 (%)


Trial name,accuracy,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_90bf0_00000,0.564411,2023-01-22_16-25-24,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,5,2.74056,192.168.1.20,128471,1691.03,321.764,1691.03,1674415524,0,,5,90bf0_00000,0.00397849
objective_90bf0_00001,0.564411,2023-01-22_16-47-02,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,4,2.7393,192.168.1.20,128471,1298.73,321.802,1298.73,1674416822,0,,4,90bf0_00001,0.00397849
objective_90bf0_00002,0.564411,2023-01-22_16-57-58,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,2,2.76378,192.168.1.20,128471,655.053,322.708,655.053,1674417478,0,,2,90bf0_00002,0.00397849
objective_90bf0_00003,0.564411,2023-01-22_17-08-56,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,2,2.76269,192.168.1.20,128471,658.071,323.408,658.071,1674418136,0,,2,90bf0_00003,0.00397849
objective_90bf0_00004,0.748842,2023-01-22_17-14-31,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,1,2.29474,192.168.1.20,128471,334.965,334.965,334.965,1674418471,0,,1,90bf0_00004,0.00397849
objective_90bf0_00005,0.598703,2023-01-22_17-19-55,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,1,2.64523,192.168.1.20,128471,324.426,324.426,324.426,1674418795,0,,1,90bf0_00005,0.00397849
objective_90bf0_00006,0.564411,2023-01-22_17-30-50,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,2,2.76747,192.168.1.20,128471,655.062,321.989,655.062,1674419450,0,,2,90bf0_00006,0.00397849
objective_90bf0_00007,0.673772,2023-01-22_17-52-39,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,4,3.94017,192.168.1.20,128471,1308.52,324.045,1308.52,1674420759,0,,4,90bf0_00007,0.00397849
objective_90bf0_00008,0.585728,2023-01-22_17-58-15,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,1,8.69234,192.168.1.20,128471,336.359,336.359,336.359,1674421095,0,,1,90bf0_00008,0.00397849
objective_90bf0_00009,0.696942,2023-01-22_18-03-48,True,,c0961cff7e2149b9a97bad138b4b8cc9,arch-pc,1,2.32481,192.168.1.20,128471,333.279,333.279,333.279,1674421428,0,,1,90bf0_00009,0.00397849


[2m[36m(objective pid=128471)[0m Validation loss: 3.62980
[2m[36m(objective pid=128471)[0m Validation accuracy: 43.74 (%)
[2m[36m(objective pid=128471)[0m Validation precision: 0.4363
[2m[36m(objective pid=128471)[0m Validation recall: 0.9979
[2m[36m(objective pid=128471)[0m Validation F1: 0.6071
[2m[36m(objective pid=128471)[0m Train precision: 0.4226
[2m[36m(objective pid=128471)[0m Train recall: 0.4907
[2m[36m(objective pid=128471)[0m Train F1: 0.4541
[2m[36m(objective pid=128471)[0m Training loss: 103.15014
[2m[36m(objective pid=128471)[0m Training accuracy: 50.58 (%)
[2m[36m(objective pid=128471)[0m Validation loss: 2.74079
[2m[36m(objective pid=128471)[0m Validation accuracy: 56.44 (%)
[2m[36m(objective pid=128471)[0m Validation precision: 0.0000
[2m[36m(objective pid=128471)[0m Validation recall: 0.0000
[2m[36m(objective pid=128471)[0m Validation F1: 0.0000
[2m[36m(objective pid=128471)[0m Train precision: 0.0000
[2m[36m(objectiv

2023-01-23 16:01:49,590	INFO tune.py:762 -- Total run time: 86683.30 seconds (86683.14 seconds for the tuning loop).


In [22]:
best_result = results.get_best_result("loss", "min", "last")
print("Best trial config: {}".format(best_result.config))
print("Best trial final validation loss: {}".format(best_result.metrics["loss"]))
print("Best trial final validation accuracy: {}".format(best_result.metrics["accuracy"]))

Best trial config: {'lr': 1e-05, 'optimizer_name': 'AdamW'}
Best trial final validation loss: 1.8144028914333494
Best trial final validation accuracy: 0.7979610750695088


In [21]:
resultado = results.get_dataframe()
resultado.rename(columns={'config/optimizer_name': 'opt', 'config/lr': 'lr'}, inplace = True)
df = resultado[['loss', 'accuracy', 'lr', 'opt', 'training_iteration']]
df

Unnamed: 0,loss,accuracy,lr,opt,training_iteration
0,2.740561,0.564411,0.00100,Adam,5
1,2.739303,0.564411,0.00100,Adam,4
2,2.763781,0.564411,0.00010,Adam,2
3,2.762687,0.564411,0.00010,AdamW,2
4,2.294743,0.748842,0.00001,AdamW,1
...,...,...,...,...,...
115,2.737732,0.564411,0.00100,Adam,4
116,4.203471,0.710843,0.00001,AdamW,5
117,2.767753,0.640408,0.00001,Adam,2
118,20.363025,0.577386,0.00100,SGD,1
