In [6]:
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import random_split
from torch.utils.data import DataLoader

from functools import partial

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.air.checkpoint import Checkpoint

import torchvision
from torchvision import datasets
from torchvision import transforms

from configs import Inputs
import utils.augmentations
from utils.data import FullRadiographSexDataset

import numpy as np
from PIL import Image, ImageFile

In [7]:
DEVICE = torch.device("cpu" if torch.cuda.device_count() < 1 else "cuda:0")
batch_size = 4
CLASSES = 2
DIR = os.getcwd()
EPOCHS = 30
N_TRAIN_EXAMPLES = batch_size * 30
N_VALID_EXAMPLES = batch_size * 10
gpus_per_trial = 1
cpus_per_trial = 2

print(f"Device: {DEVICE}\nBatch size: {batch_size}\nClasses: {CLASSES}\n\
Dir: {DIR}\nEpochs: {EPOCHS}\n\
Number of training examples: {N_TRAIN_EXAMPLES}\n\
Number of validation examples: {N_VALID_EXAMPLES}")

Device: cuda:0
Batch size: 4
Classes: 2
Dir: /home/david/Documents/iVision/patch-1
Epochs: 30
Number of training examples: 120
Number of validation examples: 40


In [8]:
model = torchvision.models.efficientnet_v2_l(weights=torchvision.models.EfficientNet_V2_L_Weights.IMAGENET1K_V1)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

In [9]:
from torchvision import transforms as T
img_size = 224

transform = T.Compose([
                T.Resize((img_size,img_size)),
                T.ToTensor(),
                T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

In [10]:
from torch.utils.data import DataLoader
from configs import Inputs
from utils.augmentations import get_transforms
from utils.data import FullRadiographSexDataset

val_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                       fold_nums=Inputs().val_folds,
                                       transforms=get_transforms(Inputs(), subset=["val"]))

train_dataset = FullRadiographSexDataset(root_dir=Inputs.DATASET_DIR,
                                         fold_nums=Inputs().train_folds,
                                         transforms=get_transforms(Inputs(), subset=["train"]))

train_dataloader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=0)
val_dataloader = DataLoader(val_dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=0)

Using only horizontal flip augmentation.
Using only horizontal flip augmentation.


In [11]:
config = {
    "lr": tune.choice([1e-2, 1e-3, 1e-4, 1e-5]),
    "optimizer_name": tune.choice(["Adam", "AdamW", "SGD"])
}

In [12]:
def compute_metrics(outputs, labels):
    # convert outputs to the predicted classes
    _, pred = torch.max(outputs, 1)

    # compare predictions to true label
    total = len(labels)
    true_positives = (pred & labels.data.view_as(pred)).sum().item()
    true_negatives = ((1 - pred) & (1 - labels).data.view_as(pred)).sum().item()
    false_positives = (pred & (1 - labels).data.view_as(pred)).sum().item()
    false_negatives = ((1 - pred) & labels.data.view_as(pred)).sum().item()

    return {
        'tp': true_positives,
        'tn': true_negatives,
        'fp': false_positives,
        'fn': false_negatives,
        'total': total,
    }

In [13]:
def objective(config):

    # Gerar o modelo
    model = torchvision.models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.IMAGENET1K_V1)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
    model.to(DEVICE)

    # Gerar optimizer
    optimizer_name = config['optimizer_name']
    lr = config["lr"]
    
    print("opt_name:", optimizer_name, "\nlr:", lr)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    train_loader, valid_loader = train_dataloader, val_dataloader

    criterion = nn.CrossEntropyLoss()
    
    running_loss, total = 0, 0
    tp, tn, fp, fn = 0, 0, 0, 0
    
    for epoch in range(5):  # loop over the dataset multiple times
        for i, data in enumerate(train_dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # sum up the loss
            running_loss += loss.item() * len(inputs)

            metrics = compute_metrics(outputs, labels)
            tp += metrics['tp']
            tn += metrics['tn']
            fp += metrics['fp']
            fn += metrics['fn']
            total += metrics['total']
            
        if total != 0:
            accuracy = (tp + tn) / total
        else:
            accuracy = 0
        
        if (tp + fp) != 0:
            precision = tp / (tp + fp)
        else:
            precision = 0
        
        if (tp+fn) != 0:
            recall = tp / (tp + fn)
        else:
            recall = 0
        
        if (2 * tp + fp + fn) != 0:
            f1 = 2 * tp / (2 * tp + fp + fn)
        else:
            f1 = 0

        print(f'Train precision: {precision:.4f}')
        print(f'Train recall: {recall:.4f}')
        print(f'Train F1: {f1:.4f}')
        print(f'Training loss: {running_loss / len(train_dataloader):.5f}')
        print(f'Training accuracy: {100*accuracy:.2f} (%)')
        
        # Validation loss
        with torch.no_grad():
            
            running_loss, total = 0, 0
            tp, tn, fp, fn = 0, 0, 0, 0 
            
            for i, data in enumerate(val_dataloader, 0):
                model.eval()
                inputs, labels = data
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # sum up the loss
                running_loss += loss.item() * len(inputs)

                metrics_dict = compute_metrics(outputs, labels)
                tp += metrics_dict['tp']
                tn += metrics_dict['tn']
                fp += metrics_dict['fp']
                fn += metrics_dict['fn']
                total += metrics_dict['total']

            if total != 0:
                accuracy = (tp + tn) / total
            else:
                accuracy = 0
                
            if (tp+fp) != 0:
                precision = tp / (tp + fp)
            else:
                precision = 0
                
            if (tp+fn) != 0:
                recall = tp / (tp + fn)
            else:
                recall = 0
                
            if (2 * tp + fp + fn) != 0:
                f1 = 2 * tp / (2 * tp + fp + fn)
            else:
                f1 = 0

            val_loss=running_loss / len(val_dataloader)
            print(f'Validation loss: {val_loss:.5f}')
            print(f'Validation accuracy: {100*accuracy:.2f} (%)') 
            print(f'Validation precision: {precision:.4f}')
            print(f'Validation recall: {recall:.4f}')
            print(f'Validation F1: {f1:.4f}')
            
            tune.report(loss=val_loss, accuracy=accuracy)

    return accuracy

In [14]:
scheduler = ASHAScheduler(
        max_t=5,
        grace_period=1,
        reduction_factor=2
)

reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"]
)

tuner = tune.Tuner(
        tune.with_resources(
            tune.with_parameters(objective),
            resources={"cpu": cpus_per_trial, "gpu": gpus_per_trial}
        ),
        tune_config=tune.TuneConfig(
            metric="loss",
            mode="min",
            scheduler=scheduler,
            num_samples=N_TRAIN_EXAMPLES,
        ),
        param_space=config,
    )
results = tuner.fit()


2023-01-24 19:16:33,215	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8266 [39m[22m


0,1
Current time:,2023-01-25 07:47:04
Running for:,12:30:30.01
Memory:,8.6/11.6 GiB

Trial name,status,loc,lr,optimizer_name,iter,total time (s),loss,accuracy
objective_c35df_00064,RUNNING,192.168.1.20:223797,0.0001,SGD,,,,
objective_c35df_00065,PENDING,,0.001,Adam,,,,
objective_c35df_00066,PENDING,,0.01,AdamW,,,,
objective_c35df_00067,PENDING,,0.0001,Adam,,,,
objective_c35df_00068,PENDING,,0.001,SGD,,,,
objective_c35df_00069,PENDING,,0.001,AdamW,,,,
objective_c35df_00070,PENDING,,1e-05,Adam,,,,
objective_c35df_00071,PENDING,,0.01,SGD,,,,
objective_c35df_00072,PENDING,,1e-05,Adam,,,,
objective_c35df_00073,PENDING,,0.0001,AdamW,,,,


[2m[36m(objective pid=223797)[0m opt_name: SGD 
[2m[36m(objective pid=223797)[0m lr: 0.001
[2m[36m(objective pid=223797)[0m Train precision: 0.5128
[2m[36m(objective pid=223797)[0m Train recall: 0.0303
[2m[36m(objective pid=223797)[0m Train F1: 0.0573
[2m[36m(objective pid=223797)[0m Training loss: 2.65884
[2m[36m(objective pid=223797)[0m Training accuracy: 59.28 (%)


Trial name,accuracy,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
objective_c35df_00000,0.759963,2023-01-24_19-47-52,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,5,2.19724,192.168.1.20,223797,1872.29,370.525,1872.29,1674600472,0,,5,c35df_00000,0.00429296
objective_c35df_00001,0.564411,2023-01-24_20-00-21,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,2,2.75789,192.168.1.20,223797,748.581,362.27,748.581,1674601221,0,,2,c35df_00001,0.00429296
objective_c35df_00002,0.704356,2023-01-24_20-07-03,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,1,2.44784,192.168.1.20,223797,402.225,402.225,402.225,1674601623,0,,1,c35df_00002,0.00429296
objective_c35df_00003,0.555144,2023-01-24_20-13-30,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,1,3.03404,192.168.1.20,223797,386.819,386.819,386.819,1674602010,0,,1,c35df_00003,0.00429296
objective_c35df_00004,0.58202,2023-01-24_20-19-53,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,1,2.63342,192.168.1.20,223797,383.233,383.233,383.233,1674602393,0,,1,c35df_00004,0.00429296
objective_c35df_00005,0.564411,2023-01-24_20-32-03,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,2,2.76591,192.168.1.20,223797,729.617,359.796,729.617,1674603123,0,,2,c35df_00005,0.00429296
objective_c35df_00006,0.728452,2023-01-24_21-02-22,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,5,4.02738,192.168.1.20,223797,1819.45,368.399,1819.45,1674604942,0,,5,c35df_00006,0.00429296
objective_c35df_00007,0.655236,2023-01-24_21-25-57,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,4,3.07339,192.168.1.20,223797,1415.01,343.237,1415.01,1674606357,0,,4,c35df_00007,0.00429296
objective_c35df_00008,0.643188,2023-01-24_21-53-33,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,5,4.57257,192.168.1.20,223797,1656.39,327.86,1656.39,1674608013,0,,5,c35df_00008,0.00429296
objective_c35df_00009,0.820204,2023-01-24_22-22-56,True,,d7430ed690df44048c8f0dba157d7fb7,arch-pc,5,2.22379,192.168.1.20,223797,1762.45,359.587,1762.45,1674609776,0,,5,c35df_00009,0.00429296


[2m[36m(objective pid=223797)[0m Validation loss: 2.61083
[2m[36m(objective pid=223797)[0m Validation accuracy: 63.21 (%)
[2m[36m(objective pid=223797)[0m Validation precision: 0.8230
[2m[36m(objective pid=223797)[0m Validation recall: 0.1979
[2m[36m(objective pid=223797)[0m Validation F1: 0.3190
[2m[36m(objective pid=223797)[0m Train precision: 0.6982
[2m[36m(objective pid=223797)[0m Train recall: 0.2808
[2m[36m(objective pid=223797)[0m Train F1: 0.4005
[2m[36m(objective pid=223797)[0m Training loss: 4.26040
[2m[36m(objective pid=223797)[0m Training accuracy: 64.79 (%)
[2m[36m(objective pid=223797)[0m Validation loss: 2.16126
[2m[36m(objective pid=223797)[0m Validation accuracy: 75.63 (%)
[2m[36m(objective pid=223797)[0m Validation precision: 0.8245
[2m[36m(objective pid=223797)[0m Validation recall: 0.5596
[2m[36m(objective pid=223797)[0m Validation F1: 0.6667
[2m[36m(objective pid=223797)[0m Train precision: 0.7325
[2m[36m(objective 

In [None]:
best_result = results.get_best_result("loss", "min", "last")
print("Best trial config: {}".format(best_result.config))
print("Best trial final validation loss: {}".format(best_result.metrics["loss"]))
print("Best trial final validation accuracy: {}".format(best_result.metrics["accuracy"]))

In [None]:
resultado = results.get_dataframe()
resultado.rename(columns={'config/optimizer_name': 'opt', 'config/lr': 'lr'}, inplace = True)
df = resultado[['loss', 'accuracy', 'lr', 'opt', 'training_iteration']]
df