In [1]:
from IPython.display import clear_output

In [2]:
import os

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ray import train, tune
from ray.tune.schedulers import ASHAScheduler

In [3]:
curr_path = os.path.abspath(os.getcwd())

In [4]:
def get_transfer_learning_model(classifier_dropout):
    model_weights = models.VGG19_Weights.DEFAULT
    model_ft = models.vgg19(weights=model_weights)

    for param in model_ft.parameters():
        # param.requires_grad = False
        param.requires_grad = True

    for param in model_ft.classifier.parameters():
        param.requires_grad = True

    model_ft.classifier[-1] = nn.Linear(model_ft.classifier[-1].in_features, 2)

    for layer in model_ft.classifier:
        if isinstance(layer, nn.Dropout):
            layer.p = classifier_dropout
    
    return model_ft

In [5]:
import torch.utils

target_transforms = transforms.Compose([
    lambda x:torch.tensor(x), # or just torch.tensor
    lambda x:F.one_hot(x,2)
])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.5, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.8, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256, interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = os.path.join(curr_path, "data")
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x], target_transform=target_transforms)
                  for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# image_datasets['train'], image_datasets['val'] = torch.utils.data.random_split(image_datasets['train'], [30, 10])

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=50,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class_names, device

(['cleaned', 'dirty'], device(type='cuda', index=0))

In [6]:
def train_func(model, optimizer, clip_value):
    total = 0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    running_loss = 0
    correct = 0
    for batch_idx, (data, target) in enumerate(dataloaders['train']):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target.float())

        total += output.size(0)
        running_loss += loss.item() * output.size(0)

        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        optimizer.step()

        # accuracy
        _, predicted = torch.max(output.data, 1)
        _, correct_class = torch.max(target.data, 1)
        
        correct += (predicted == correct_class).sum().item()
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

def test_func(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    running_loss = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(dataloaders['val']):
            
            data, target = data.to(device), target.to(device)
            outputs = model(data)

            # accuracy
            _, predicted = torch.max(outputs.data, 1)
            _, correct_class = torch.max(target.data, 1)
            total += target.size(0)
            correct += (predicted == correct_class).sum().item()

            # loss
            running_loss += F.cross_entropy(outputs, target.float()).item() * outputs.size(0)
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

In [7]:
import os
import tempfile

from ray.train import Checkpoint

def train_dishs(config, max_epochs=20, tunning=True):
    # Data Setup

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = get_transfer_learning_model(config['classifier_dropout'])
    model.to(device)

    optimizer = optim.SGD(
        model.parameters(), lr=config["lr"], momentum=config["momentum"], weight_decay=config['weight_decay'])
    for i in range(max_epochs):
        train_log = train_func(model, optimizer, config['clip_value'])
        val_log = test_func(model)

        if tunning:
            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
                checkpoint = None
                if (i + 1) % max_epochs == 0 and (val_log["mean_loss"] < 0.4):
                    # This saves the model to the trial directory
                    torch.save(
                        model.state_dict(),
                        os.path.join(temp_checkpoint_dir, "model.pth")
                    )
                    checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)

                # Send the current training result back to Tune
                train.report(
                    {
                        "train_mean_loss": train_log["mean_loss"],
                        "train_mean_accuracy": train_log["mean_accuracy"],
                        "val_mean_loss": val_log["mean_loss"],
                        "val_mean_accuracy": val_log["mean_accuracy"],
                    },
                    checkpoint=checkpoint
                )
        else:
            print("-"*10, f"epoch: {i+1}/{max_epochs}","-"*10)
            print(f"train: {train_log}\nval: {val_log}")
    if not tunning:
        return {
            "model": model,
            "log": {
                "train": train_log,
                "val": val_log,
            },
        }

In [8]:
"""config = {
    "lr":0.1,
    "momentum":0.5,
}

train_dishs(config)"""

'config = {\n    "lr":0.1,\n    "momentum":0.5,\n}\n\ntrain_dishs(config)'

In [9]:
from hyperopt import hp
from ray.tune.search.hyperopt import HyperOptSearch

space = {
    "lr": hp.loguniform("lr", -10, -1),
    "momentum": hp.uniform("momentum", 0.1, 0.9),
    "classifier_dropout": hp.uniform("classifier_dropout", 0.5, 0.95),
    "weight_decay": hp.loguniform("weight_decay", -6, -2),
    "clip_value": hp.uniform("clip_value", 0.1, 5.0),
}

hyperopt_search = HyperOptSearch(space, metric="val_mean_accuracy", mode="max")

trainable_with_resources = tune.with_resources(train_dishs, {"gpu": 1})

tuner = tune.Tuner(
    trainable_with_resources,
    tune_config=tune.TuneConfig(
        num_samples=40,
        search_alg=hyperopt_search,
    ),
)
results = tuner.fit()

0,1
Current time:,2024-06-15 14:57:06
Running for:,00:13:48.00
Memory:,6.5/15.6 GiB

Trial name,status,loc,classifier_dropout,clip_value,lr,momentum,weight_decay,iter,total time (s),train_mean_loss,train_mean_accuracy,val_mean_loss
train_dishs_ae947690,TERMINATED,172.18.58.174:539858,0.83629,2.72739,0.000867158,0.659954,0.00325113,20,18.0117,0.899619,0.6,0.702159
train_dishs_3734e601,TERMINATED,172.18.58.174:542905,0.547895,4.50671,5.99513e-05,0.570225,0.0125885,20,17.6431,0.823211,0.366667,0.639833
train_dishs_35efa26e,TERMINATED,172.18.58.174:545906,0.585355,2.24106,0.0425951,0.708724,0.0405391,20,18.7357,0.445558,0.733333,0.393643
train_dishs_d563830e,TERMINATED,172.18.58.174:548916,0.815419,4.03424,0.366466,0.826378,0.0118158,20,17.9736,24.666,0.433333,2.08141
train_dishs_ef736066,TERMINATED,172.18.58.174:551921,0.511643,1.08207,0.000490123,0.69297,0.125463,20,17.9238,0.824623,0.266667,0.694361
train_dishs_30778226,TERMINATED,172.18.58.174:554934,0.682281,4.07201,0.00214614,0.652695,0.101587,20,17.9214,0.687459,0.6,0.745214
train_dishs_7d09c6a7,TERMINATED,172.18.58.174:557939,0.680769,1.67005,0.00867453,0.288225,0.00263019,20,17.8913,0.607087,0.533333,0.660941
train_dishs_049d37ef,TERMINATED,172.18.58.174:560934,0.729702,1.09928,0.172575,0.580567,0.00617589,20,18.0093,0.577945,0.766667,0.472404
train_dishs_f86f883b,TERMINATED,172.18.58.174:563939,0.814589,3.05409,0.000933882,0.536193,0.0327097,20,18.1624,0.756033,0.566667,0.678921
train_dishs_7df621a5,TERMINATED,172.18.58.174:566944,0.697718,3.78301,0.00274594,0.472526,0.0132951,20,17.8807,0.528733,0.666667,0.655869


[36m(train_dishs pid=545906)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/glucas11/ray_results/train_dishs_2024-06-15_14-43-16/train_dishs_35efa26e_3_classifier_dropout=0.5854,clip_value=2.2411,lr=0.0426,momentum=0.7087,weight_decay=0.0405_2024-06-15_14-43-41/checkpoint_000000)
[36m(train_dishs pid=600019)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/glucas11/ray_results/train_dishs_2024-06-15_14-43-16/train_dishs_048a4f4a_21_classifier_dropout=0.5445,clip_value=3.2612,lr=0.0167,momentum=0.6000,weight_decay=0.0492_2024-06-15_14-49-56/checkpoint_000000)
[36m(train_dishs pid=603026)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/glucas11/ray_results/train_dishs_2024-06-15_14-43-16/train_dishs_beef41ed_22_classifier_dropout=0.5672,clip_value=2.2676,lr=0.0237,momentum=0.8757,weight_decay=0.0641_2024-06-15_14-50-17/checkpoint_000000)
[36m(train_dishs pid=645077)[0m Checkpoint successf

In [12]:
best_result = results.get_best_result("val_mean_accuracy", mode="max")
best_result.metrics

{'train_mean_loss': 0.8232114315032959,
 'train_mean_accuracy': 0.36666666666666664,
 'val_mean_loss': 0.6398330926895142,
 'val_mean_accuracy': 0.9,
 'timestamp': 1718477039,
 'checkpoint_dir_name': None,
 'done': True,
 'training_iteration': 20,
 'trial_id': '3734e601',
 'date': '2024-06-15_14-43-59',
 'time_this_iter_s': 0.8173401355743408,
 'time_total_s': 17.64312481880188,
 'pid': 542905,
 'hostname': 'DESKTOP-GF0BL1G',
 'node_ip': '172.18.58.174',
 'config': {'classifier_dropout': 0.5478954482088054,
  'clip_value': 4.506712382652889,
  'lr': 5.9951265748155445e-05,
  'momentum': 0.5702254199292307,
  'weight_decay': 0.012588494586609216},
 'time_since_restore': 17.64312481880188,
 'iterations_since_restore': 20,
 'experiment_tag': '2_classifier_dropout=0.5479,clip_value=4.5067,lr=0.0001,momentum=0.5702,weight_decay=0.0126'}

In [19]:
best_result = results.get_best_result("val_mean_loss", mode="min")
with best_result.checkpoint.as_directory() as checkpoint_dir:
    state_dict = torch.load(os.path.join(checkpoint_dir, "model.pth"))

model = get_transfer_learning_model(best_result.config['classifier_dropout']).to(device)
model.load_state_dict(state_dict)
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5445068891619167, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5445068891619167, inplace=False)
  (6): Linear(in_features=4096, out_features=2, bias=True)
)

In [14]:
from PIL import Image
import pandas as pd

def to_csv(model, batch_size=10):
    model.eval()
    PATH_TEST = os.path.join(curr_path, "data/test/")
    test_file_names = os.listdir(PATH_TEST)
    test_file_names.sort()

    submission_csv = {
        "id": [],
        "label": []
    }

    for file_name in test_file_names:
        id = file_name.split(".")[0]
        test_input = Image.open(os.path.join(PATH_TEST, file_name))
        test_input = data_transforms['test'](test_input).to(device).unsqueeze(0)
        with torch.no_grad():
            pred_test_label = model(test_input).max(1).indices.item()
            pred_test_label = class_names[pred_test_label]
        submission_csv['id'].append(id)
        submission_csv['label'].append(pred_test_label)

    submission_csv = pd.DataFrame(submission_csv).set_index("id")
    submission_csv.to_csv("submission.csv")

In [15]:
to_csv(model)

In [22]:
best_config_train_model = train_dishs(best_result.config, max_epochs=100, tunning=False)

---------- epoch: 1/100 ----------
train: {'mean_loss': 0.8157494068145752, 'mean_accuracy': 0.5333333333333333}
val: {'mean_loss': 0.5668608546257019, 'mean_accuracy': 0.8}
---------- epoch: 2/100 ----------
train: {'mean_loss': 0.6545587778091431, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.7011714577674866, 'mean_accuracy': 0.6}
---------- epoch: 3/100 ----------
train: {'mean_loss': 0.8297837972640991, 'mean_accuracy': 0.5666666666666667}
val: {'mean_loss': 0.6197863817214966, 'mean_accuracy': 0.8}
---------- epoch: 4/100 ----------
train: {'mean_loss': 0.6733068227767944, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.8567941784858704, 'mean_accuracy': 0.6}
---------- epoch: 5/100 ----------
train: {'mean_loss': 0.6254743933677673, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.6771092414855957, 'mean_accuracy': 0.6}
---------- epoch: 6/100 ----------
train: {'mean_loss': 0.6015492081642151, 'mean_accuracy': 0.7}
val: {'mean_loss': 0.7042109370231628, 'mean_accuracy': 0.6}
---------- epoch

In [25]:
new_model = best_config_train_model['model']

In [32]:
new_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [33]:
to_csv(new_model)