In [1]:
from IPython.display import clear_output

In [2]:
import os

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ray import train, tune
from ray.tune.schedulers import ASHAScheduler

In [3]:
curr_path = os.path.abspath(os.getcwd())

In [4]:
def get_transfer_learning_model(classifier_dropout):
    model_weights = models.VGG19_Weights.DEFAULT
    model_ft = models.vgg19(weights=model_weights)

    for param in model_ft.parameters():
        # param.requires_grad = False
        param.requires_grad = True

    for param in model_ft.classifier.parameters():
        param.requires_grad = True

    model_ft.classifier[-1] = nn.Linear(model_ft.classifier[-1].in_features, 2)

    for layer in model_ft.classifier:
        if isinstance(layer, nn.Dropout):
            layer.p = classifier_dropout
    
    return model_ft

In [5]:
import torch.utils

target_transforms = transforms.Compose([
    lambda x:torch.tensor(x), # or just torch.tensor
    lambda x:F.one_hot(x,2)
])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.5, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.8, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256, interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = os.path.join(curr_path, "data")
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x], target_transform=target_transforms)
                  for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# image_datasets['train'], image_datasets['val'] = torch.utils.data.random_split(image_datasets['train'], [30, 10])

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=50,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class_names, device

(['cleaned', 'dirty'], device(type='cuda', index=0))

In [6]:
def train_func(model, optimizer, exp_lr_scheduler, clip_value):
    total = 0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    running_loss = 0
    correct = 0
    for batch_idx, (data, target) in enumerate(dataloaders['train']):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target.float())

        total += output.size(0)
        running_loss += loss.item() * output.size(0)

        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        optimizer.step()
        # accuracy
        _, predicted = torch.max(output.data, 1)
        _, correct_class = torch.max(target.data, 1)
        
        correct += (predicted == correct_class).sum().item()
    
    exp_lr_scheduler.step()
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

def test_func(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    running_loss = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(dataloaders['val']):
            
            data, target = data.to(device), target.to(device)
            outputs = model(data)

            # accuracy
            _, predicted = torch.max(outputs.data, 1)
            _, correct_class = torch.max(target.data, 1)
            total += target.size(0)
            correct += (predicted == correct_class).sum().item()

            # loss
            running_loss += F.cross_entropy(outputs, target.float()).item() * outputs.size(0)
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

In [7]:
import os
import tempfile

from ray.train import Checkpoint

def train_dishs(config, max_epochs=30, tunning=True):
    # Data Setup

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = get_transfer_learning_model(config['classifier_dropout'])
    model.to(device)
    

    optimizer = optim.SGD(
        model.parameters(), lr=config["lr"], momentum=config["momentum"], weight_decay=config['weight_decay'])
    
    
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=config['lr_scheduler_gamma'])
    for i in range(max_epochs):
        train_log = train_func(model, optimizer, exp_lr_scheduler, config['clip_value'])
        val_log = test_func(model)

        if tunning:
            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
                checkpoint = None
                if (i + 1) % max_epochs == 0 and (val_log["mean_loss"] < 0.4):
                    # This saves the model to the trial directory
                    torch.save(
                        model.state_dict(),
                        os.path.join(temp_checkpoint_dir, "model.pth")
                    )
                    checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)

                # Send the current training result back to Tune
                train.report(
                    {
                        "train_mean_loss": train_log["mean_loss"],
                        "train_mean_accuracy": train_log["mean_accuracy"],
                        "val_mean_loss": val_log["mean_loss"],
                        "val_mean_accuracy": val_log["mean_accuracy"],
                    },
                    checkpoint=checkpoint
                )
        else:
            print("-"*10, f"epoch: {i+1}/{max_epochs}","-"*10)
            print(f"train: {train_log}\nval: {val_log}")
    if not tunning:
        return {
            "model": model,
            "log": {
                "train": train_log,
                "val": val_log,
            },
        }

In [8]:
"""config = {
    "lr":0.1,
    "momentum":0.5,
}

train_dishs(config)"""

'config = {\n    "lr":0.1,\n    "momentum":0.5,\n}\n\ntrain_dishs(config)'

In [9]:
from hyperopt import hp
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

space = {
    "lr": hp.loguniform("lr", -10, -1),
    "momentum": hp.uniform("momentum", 0.1, 0.9),
    "classifier_dropout": hp.uniform("classifier_dropout", 0.5, 0.95),
    "weight_decay": hp.loguniform("weight_decay", -6, -2),
    "clip_value": hp.uniform("clip_value", 0.1, 5.0),
    "lr_scheduler_gamma": hp.uniform("lr_scheduler_gamma", 0.1, 1.0)
}

metric = "val_mean_loss"
mode = "min"

hyperopt_search = HyperOptSearch(space, metric=metric, mode=mode)

asas_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric=metric,
    mode=mode,
    max_t=30,
    grace_period=5,
    reduction_factor=3,
    brackets=2
)

trainable_with_resources = tune.with_resources(train_dishs, {"gpu": 1})

tuner = tune.Tuner(
    trainable_with_resources,
    tune_config=tune.TuneConfig(
        num_samples=100,
        search_alg=hyperopt_search,
        scheduler=asas_scheduler
    ),
)
results = tuner.fit()

0,1
Current time:,2024-06-16 13:17:19
Running for:,00:24:52.49
Memory:,5.3/15.6 GiB

Trial name,status,loc,classifier_dropout,clip_value,lr,lr_scheduler_gamma,momentum,weight_decay,iter,total time (s),train_mean_loss,train_mean_accuracy,val_mean_loss
train_dishs_0a618ba3,TERMINATED,172.18.58.174:430477,0.610754,3.03858,0.0400419,0.344358,0.169421,0.0368933,30,28.1,0.56375,0.7,0.73383
train_dishs_39cc7195,TERMINATED,172.18.58.174:434990,0.708632,2.11023,0.00177131,0.439682,0.600105,0.0703307,30,26.8313,0.602891,0.7,0.664904
train_dishs_ef26f777,TERMINATED,172.18.58.174:439460,0.634981,3.42104,0.000801587,0.503764,0.279204,0.0173132,15,14.9884,0.592465,0.666667,0.741146
train_dishs_5a0b4891,TERMINATED,172.18.58.174:441744,0.57455,3.3726,0.00112201,0.251087,0.486433,0.0030721,5,5.66461,0.761804,0.466667,0.819612
train_dishs_fa676826,TERMINATED,172.18.58.174:442560,0.757085,0.609918,0.353489,0.337306,0.504216,0.00309979,30,27.1306,0.601357,0.633333,0.558982
train_dishs_793a20ed,TERMINATED,172.18.58.174:447026,0.510867,1.83341,0.00132643,0.907356,0.153858,0.0187906,5,5.5938,0.746226,0.566667,0.676006
train_dishs_38932dab,TERMINATED,172.18.58.174:447842,0.637915,3.73282,0.00482558,0.334725,0.765828,0.0123677,30,27.9494,0.600832,0.666667,0.69539
train_dishs_415f99a7,TERMINATED,172.18.58.174:452318,0.852037,0.156849,0.000199262,0.558342,0.349643,0.00461157,5,5.90891,0.963969,0.4,0.68416
train_dishs_466f55ee,TERMINATED,172.18.58.174:453134,0.796855,4.24551,0.0221512,0.508549,0.106718,0.124756,5,5.49636,0.761196,0.533333,0.650362
train_dishs_72b22ca1,TERMINATED,172.18.58.174:453945,0.875653,1.1963,0.0830586,0.175389,0.877918,0.00399038,5,5.96057,0.93825,0.533333,0.671105


2024-06-16 13:17:19,884	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/glucas11/ray_results/train_dishs_2024-06-16_12-52-23' in 0.0165s.
2024-06-16 13:17:19,914	INFO tune.py:1041 -- Total run time: 1492.54 seconds (1492.47 seconds for the tuning loop).


In [10]:
best_result = results.get_best_result("val_mean_loss", mode="min")
best_result.metrics

{'train_mean_loss': 0.14902611076831818,
 'train_mean_accuracy': 0.9666666666666667,
 'val_mean_loss': 0.42100945115089417,
 'val_mean_accuracy': 0.7,
 'timestamp': 1718557585,
 'checkpoint_dir_name': None,
 'done': True,
 'training_iteration': 30,
 'trial_id': '4119257a',
 'date': '2024-06-16_13-06-25',
 'time_this_iter_s': 0.8523390293121338,
 'time_total_s': 28.620545625686646,
 'pid': 531893,
 'hostname': 'DESKTOP-GF0BL1G',
 'node_ip': '172.18.58.174',
 'config': {'classifier_dropout': 0.5202022268098999,
  'clip_value': 4.128483916178099,
  'lr': 0.01086929626316441,
  'lr_scheduler_gamma': 0.8284659450969173,
  'momentum': 0.7454226356092859,
  'weight_decay': 0.020572528271027585},
 'time_since_restore': 28.620545625686646,
 'iterations_since_restore': 30,
 'experiment_tag': '57_classifier_dropout=0.5202,clip_value=4.1285,lr=0.0109,lr_scheduler_gamma=0.8285,momentum=0.7454,weight_decay=0.0206'}

In [11]:
import json

with open("best_result.json", 'w') as f:
    json.dump(best_result.config, f, default=str)

In [15]:
best_result = results.get_best_result("val_mean_loss", mode="min")
with best_result.checkpoint.as_directory() as checkpoint_dir:
    state_dict = torch.load(os.path.join(checkpoint_dir, "model.pth"))

model = get_transfer_learning_model(best_result.config['classifier_dropout']).to(device)
model.load_state_dict(state_dict)
model.classifier

AttributeError: 'NoneType' object has no attribute 'as_directory'

In [20]:
from PIL import Image
import pandas as pd

def to_csv(model, batch_size=10):
    model.eval()
    PATH_TEST = os.path.join(curr_path, "data/test/")
    test_file_names = os.listdir(PATH_TEST)
    test_file_names.sort()

    submission_csv = {
        "id": [],
        "label": []
    }

    for file_name in test_file_names:
        id = file_name.split(".")[0]
        test_input = Image.open(os.path.join(PATH_TEST, file_name))
        test_input = data_transforms['test'](test_input).to(device).unsqueeze(0)
        with torch.no_grad():
            pred_test_label = model(test_input).max(1).indices.item()
            pred_test_label = class_names[pred_test_label]
        submission_csv['id'].append(id)
        submission_csv['label'].append(pred_test_label)

    submission_csv = pd.DataFrame(submission_csv).set_index("id")
    submission_csv.to_csv("submission.csv")

In [None]:
to_csv(model)

In [16]:
best_config_train_model = train_dishs(best_result.config, max_epochs=30, tunning=False)

---------- epoch: 1/30 ----------
train: {'mean_loss': 0.8248375654220581, 'mean_accuracy': 0.43333333333333335}
val: {'mean_loss': 0.8100653886795044, 'mean_accuracy': 0.2}
---------- epoch: 2/30 ----------
train: {'mean_loss': 0.6775388717651367, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.7240206003189087, 'mean_accuracy': 0.5}
---------- epoch: 3/30 ----------
train: {'mean_loss': 0.6607518792152405, 'mean_accuracy': 0.5333333333333333}
val: {'mean_loss': 0.8362024426460266, 'mean_accuracy': 0.5}
---------- epoch: 4/30 ----------
train: {'mean_loss': 0.5340369939804077, 'mean_accuracy': 0.7666666666666667}
val: {'mean_loss': 0.7759462594985962, 'mean_accuracy': 0.4}
---------- epoch: 5/30 ----------
train: {'mean_loss': 0.5765109062194824, 'mean_accuracy': 0.7333333333333333}
val: {'mean_loss': 0.730734646320343, 'mean_accuracy': 0.5}
---------- epoch: 6/30 ----------
train: {'mean_loss': 0.6099836230278015, 'mean_accuracy': 0.7333333333333333}
val: {'mean_loss': 0.7877816557884216,

In [17]:
new_model = best_config_train_model['model']

In [18]:
new_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [21]:
to_csv(new_model)