In [1]:
from IPython.display import clear_output

In [2]:
import os

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ray import train, tune
from ray.tune.schedulers import ASHAScheduler

In [3]:
curr_path = os.path.abspath(os.getcwd())

In [4]:
def get_transfer_learning_model(classifier_dropout):
    model_weights = models.VGG19_Weights.DEFAULT
    model_ft = models.vgg19(weights=model_weights)

    for param in model_ft.parameters():
        # param.requires_grad = False
        param.requires_grad = True

    for param in model_ft.classifier.parameters():
        param.requires_grad = True

    model_ft.classifier[-1] = nn.Linear(model_ft.classifier[-1].in_features, 2)

    for layer in model_ft.classifier:
        if isinstance(layer, nn.Dropout):
            layer.p = classifier_dropout
    
    return model_ft

In [5]:
import torch.utils

target_transforms = transforms.Compose([
    lambda x:torch.tensor(x), # or just torch.tensor
    lambda x:F.one_hot(x,2)
])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.5, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.8, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256, interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = os.path.join(curr_path, "data")
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x], target_transform=target_transforms)
                  for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# image_datasets['train'], image_datasets['val'] = torch.utils.data.random_split(image_datasets['train'], [30, 10])

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=50,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class_names, device

(['cleaned', 'dirty'], device(type='cuda', index=0))

In [6]:
def train_func(model, optimizer, exp_lr_scheduler, clip_value):
    total = 0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    running_loss = 0
    correct = 0
    for batch_idx, (data, target) in enumerate(dataloaders['train']):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target.float())

        total += output.size(0)
        running_loss += loss.item() * output.size(0)

        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        optimizer.step()
        # accuracy
        _, predicted = torch.max(output.data, 1)
        _, correct_class = torch.max(target.data, 1)
        
        correct += (predicted == correct_class).sum().item()
    
    exp_lr_scheduler.step()
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

def test_func(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    running_loss = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(dataloaders['val']):
            
            data, target = data.to(device), target.to(device)
            outputs = model(data)

            # accuracy
            _, predicted = torch.max(outputs.data, 1)
            _, correct_class = torch.max(target.data, 1)
            total += target.size(0)
            correct += (predicted == correct_class).sum().item()

            # loss
            running_loss += F.cross_entropy(outputs, target.float()).item() * outputs.size(0)
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

In [7]:
import os
import tempfile

from ray.train import Checkpoint

def train_dishs(config, max_epochs=30, tunning=True):
    # Data Setup

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = get_transfer_learning_model(config['classifier_dropout'])
    model.to(device)
    

    optimizer = optim.SGD(
        model.parameters(), lr=config["lr"], momentum=config["momentum"], weight_decay=config['weight_decay'])
    
    
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=config['lr_scheduler_gamma'])
    for i in range(max_epochs):
        train_log = train_func(model, optimizer, exp_lr_scheduler, config['clip_value'])
        val_log = test_func(model)

        if tunning:
            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
                checkpoint = None
                if (i + 1) % max_epochs == 0 and (val_log["mean_loss"] < 0.4):
                    # This saves the model to the trial directory
                    torch.save(
                        model.state_dict(),
                        os.path.join(temp_checkpoint_dir, "model.pth")
                    )
                    checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)

                # Send the current training result back to Tune
                train.report(
                    {
                        "train_mean_loss": train_log["mean_loss"],
                        "train_mean_accuracy": train_log["mean_accuracy"],
                        "val_mean_loss": val_log["mean_loss"],
                        "val_mean_accuracy": val_log["mean_accuracy"],
                    },
                    checkpoint=checkpoint
                )
        else:
            print("-"*10, f"epoch: {i+1}/{max_epochs}","-"*10)
            print(f"train: {train_log}\nval: {val_log}")
    if not tunning:
        return {
            "model": model,
            "log": {
                "train": train_log,
                "val": val_log,
            },
        }

In [8]:
"""config = {
    "lr":0.1,
    "momentum":0.5,
}

train_dishs(config)"""

'config = {\n    "lr":0.1,\n    "momentum":0.5,\n}\n\ntrain_dishs(config)'

In [9]:
from hyperopt import hp
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

space = {
    "lr": hp.loguniform("lr", -10, -1),
    "momentum": hp.uniform("momentum", 0.1, 0.9),
    "classifier_dropout": hp.uniform("classifier_dropout", 0.5, 0.95),
    "weight_decay": hp.loguniform("weight_decay", -6, -2),
    "clip_value": hp.uniform("clip_value", 0.1, 5.0),
    "lr_scheduler_gamma": hp.uniform("lr_scheduler_gamma", 0.1, 1.0)
}

metric = "val_mean_loss"
mode = "min"

hyperopt_search = HyperOptSearch(space, metric=metric, mode=mode)

asas_scheduler = ASHAScheduler(
    time_attr='training_iteration',
    metric=metric,
    mode=mode,
    max_t=30,
    grace_period=5,
    reduction_factor=3,
    brackets=2
)

trainable_with_resources = tune.with_resources(train_dishs, {"gpu": 1})

tuner = tune.Tuner(
    trainable_with_resources,
    tune_config=tune.TuneConfig(
        num_samples=100,
        search_alg=hyperopt_search,
        scheduler=asas_scheduler
    ),
)
results = tuner.fit()

0,1
Current time:,2024-06-16 12:09:49
Running for:,00:31:11.39
Memory:,5.2/15.6 GiB

Trial name,status,loc,classifier_dropout,clip_value,lr,lr_scheduler_gamma,lr_scheduler_step_si ze,momentum,weight_decay,iter,total time (s),train_mean_loss,train_mean_accuracy,val_mean_loss
train_dishs_08791e88,TERMINATED,172.18.58.174:146315,0.599411,3.34403,0.116271,0.873587,5,0.106317,0.0225062,30,28.5548,0.362827,0.866667,0.41653
train_dishs_247004eb,TERMINATED,172.18.58.174:150843,0.611516,4.98023,0.09973,0.232888,6,0.619891,0.121735,15,15.0209,0.714439,0.5,0.693985
train_dishs_d9ce28e2,TERMINATED,172.18.58.174:153123,0.622817,0.188684,0.0316458,0.568515,7,0.842926,0.0219668,5,5.86283,0.71733,0.466667,0.76062
train_dishs_a537abf6,TERMINATED,172.18.58.174:153949,0.614482,4.75415,0.000280212,0.134025,2,0.598328,0.0667786,5,6.15127,0.868719,0.4,0.723398
train_dishs_a8e4eabb,TERMINATED,172.18.58.174:154770,0.547162,1.12229,6.2852e-05,0.547116,4,0.126817,0.0183266,5,6.10333,0.81127,0.433333,0.850122
train_dishs_89cd069f,TERMINATED,172.18.58.174:155590,0.576256,1.53008,8.56614e-05,0.128508,8,0.244136,0.00328314,30,27.4873,0.812319,0.433333,0.725877
train_dishs_d969dd5a,TERMINATED,172.18.58.174:160059,0.908738,2.09225,0.00392158,0.158889,8,0.60199,0.0187254,30,28.2455,1.30834,0.433333,0.714617
train_dishs_9c9a5b38,TERMINATED,172.18.58.174:164531,0.502233,1.53418,0.000161951,0.141105,2,0.872043,0.00549554,30,28.8969,0.673769,0.566667,0.592272
train_dishs_f3b8d6a2,TERMINATED,172.18.58.174:169010,0.52851,3.16139,0.0102291,0.464736,2,0.758566,0.0206246,5,6.00155,0.627945,0.666667,0.735213
train_dishs_96e8fd48,TERMINATED,172.18.58.174:169836,0.857215,3.23927,0.00549121,0.79025,5,0.175624,0.0222438,5,5.99002,0.838314,0.566667,0.766054


[36m(train_dishs pid=228991)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/glucas11/ray_results/train_dishs_2024-06-16_11-38-34/train_dishs_8576733b_34_classifier_dropout=0.5984,clip_value=3.6233,lr=0.0768,lr_scheduler_gamma=0.2496,lr_scheduler_step_size=6,m_2024-06-16_11-48-37/checkpoint_000000)
[36m(train_dishs pid=382478)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/glucas11/ray_results/train_dishs_2024-06-16_11-38-34/train_dishs_96fa4296_96_classifier_dropout=0.5399,clip_value=4.7109,lr=0.0370,lr_scheduler_gamma=0.5304,lr_scheduler_step_size=2,m_2024-06-16_12-07-39/checkpoint_000000)
2024-06-16 12:09:49,094	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/glucas11/ray_results/train_dishs_2024-06-16_11-38-34' in 0.0171s.
2024-06-16 12:09:49,114	INFO tune.py:1041 -- Total run time: 1871.44 seconds (1871.37 seconds for the tuning loop).


In [27]:
best_result = results.get_best_result("val_mean_loss", mode="min")
best_result.metrics

{'train_mean_loss': 0.3723740875720978,
 'train_mean_accuracy': 0.8333333333333334,
 'val_mean_loss': 0.3327544629573822,
 'val_mean_accuracy': 0.9,
 'timestamp': 1718554104,
 'checkpoint_dir_name': 'checkpoint_000000',
 'should_checkpoint': True,
 'done': True,
 'training_iteration': 30,
 'trial_id': '96fa4296',
 'date': '2024-06-16_12-08-24',
 'time_this_iter_s': 1.602409839630127,
 'time_total_s': 27.714566469192505,
 'pid': 382478,
 'hostname': 'DESKTOP-GF0BL1G',
 'node_ip': '172.18.58.174',
 'config': {'classifier_dropout': 0.5398794928572948,
  'clip_value': 4.7109321899908,
  'lr': 0.0369913974888341,
  'lr_scheduler_gamma': 0.5304473772254119,
  'lr_scheduler_step_size': 2,
  'momentum': 0.8174143593243359,
  'weight_decay': 0.0024889657239629062},
 'time_since_restore': 27.714566469192505,
 'iterations_since_restore': 30,
 'experiment_tag': '96_classifier_dropout=0.5399,clip_value=4.7109,lr=0.0370,lr_scheduler_gamma=0.5304,lr_scheduler_step_size=2,momentum=0.8174,weight_decay=

In [26]:
import json

with open("best_result.json", 'w') as f:
    json.dump(best_result.config, f, default=str)

In [28]:
best_result = results.get_best_result("val_mean_accuracy", mode="max")
with best_result.checkpoint.as_directory() as checkpoint_dir:
    state_dict = torch.load(os.path.join(checkpoint_dir, "model.pth"))

model = get_transfer_learning_model(best_result.config['classifier_dropout']).to(device)
model.load_state_dict(state_dict)
model.classifier

AttributeError: 'NoneType' object has no attribute 'as_directory'

In [29]:
from PIL import Image
import pandas as pd

def to_csv(model, batch_size=10):
    model.eval()
    PATH_TEST = os.path.join(curr_path, "data/test/")
    test_file_names = os.listdir(PATH_TEST)
    test_file_names.sort()

    submission_csv = {
        "id": [],
        "label": []
    }

    for file_name in test_file_names:
        id = file_name.split(".")[0]
        test_input = Image.open(os.path.join(PATH_TEST, file_name))
        test_input = data_transforms['test'](test_input).to(device).unsqueeze(0)
        with torch.no_grad():
            pred_test_label = model(test_input).max(1).indices.item()
            pred_test_label = class_names[pred_test_label]
        submission_csv['id'].append(id)
        submission_csv['label'].append(pred_test_label)

    submission_csv = pd.DataFrame(submission_csv).set_index("id")
    submission_csv.to_csv("submission.csv")

In [18]:
to_csv(model)

In [31]:
best_config_train_model = train_dishs(best_result.config, max_epochs=30, tunning=False)

---------- epoch: 1/30 ----------
train: {'mean_loss': 0.7096114158630371, 'mean_accuracy': 0.5333333333333333}
val: {'mean_loss': 0.6962162256240845, 'mean_accuracy': 0.5}
---------- epoch: 2/30 ----------
train: {'mean_loss': 0.6759913563728333, 'mean_accuracy': 0.5666666666666667}
val: {'mean_loss': 0.6861826777458191, 'mean_accuracy': 0.5}
---------- epoch: 3/30 ----------
train: {'mean_loss': 0.6411834955215454, 'mean_accuracy': 0.6666666666666666}
val: {'mean_loss': 0.7168253064155579, 'mean_accuracy': 0.2}
---------- epoch: 4/30 ----------
train: {'mean_loss': 0.6949120759963989, 'mean_accuracy': 0.5666666666666667}
val: {'mean_loss': 0.639633297920227, 'mean_accuracy': 0.8}
---------- epoch: 5/30 ----------
train: {'mean_loss': 0.7651114463806152, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.6176844835281372, 'mean_accuracy': 0.8}
---------- epoch: 6/30 ----------
train: {'mean_loss': 0.6043627858161926, 'mean_accuracy': 0.7333333333333333}
val: {'mean_loss': 0.5955191850662231, 

In [15]:
new_model = best_config_train_model['model']

In [16]:
new_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [17]:
to_csv(new_model)