In [1]:
from IPython.display import clear_output

In [2]:
import os

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ray import train, tune
from ray.tune.schedulers import ASHAScheduler

In [3]:
curr_path = os.path.abspath(os.getcwd())

In [4]:
def get_transfer_learning_model(classifier_dropout):
    model_weights = models.VGG19_Weights.DEFAULT
    model_ft = models.vgg19(weights=model_weights)

    for param in model_ft.parameters():
        # param.requires_grad = False
        param.requires_grad = True

    for param in model_ft.classifier.parameters():
        param.requires_grad = True

    model_ft.classifier[-1] = nn.Linear(model_ft.classifier[-1].in_features, 2)

    for layer in model_ft.classifier:
        if isinstance(layer, nn.Dropout):
            layer.p = classifier_dropout
    
    return model_ft

In [5]:
import torch.utils

target_transforms = transforms.Compose([
    lambda x:torch.tensor(x), # or just torch.tensor
    lambda x:F.one_hot(x,2)
])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.5, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.RandomRotation(degrees=(0, 360)),
        transforms.RandomResizedCrop(256, scale=(0.8, 1), interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.AutoAugment(policy=transforms.autoaugment.AutoAugmentPolicy.IMAGENET),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=(0.3, 1)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.0)),
        transforms.RandomEqualize(),
        transforms.RandomGrayscale(p=0.2),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256, interpolation=transforms.InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = os.path.join(curr_path, "data")
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x], target_transform=target_transforms)
                  for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# image_datasets['train'], image_datasets['val'] = torch.utils.data.random_split(image_datasets['train'], [30, 10])

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=50,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class_names, device

(['cleaned', 'dirty'], device(type='cuda', index=0))

In [6]:
def train_func(model, optimizer, clip_value):
    total = 0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()
    running_loss = 0
    correct = 0
    for batch_idx, (data, target) in enumerate(dataloaders['train']):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target.float())

        total += output.size(0)
        running_loss += loss.item() * output.size(0)

        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        optimizer.step()

        # accuracy
        _, predicted = torch.max(output.data, 1)
        _, correct_class = torch.max(target.data, 1)
        
        correct += (predicted == correct_class).sum().item()
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

def test_func(model):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    running_loss = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(dataloaders['val']):
            
            data, target = data.to(device), target.to(device)
            outputs = model(data)

            # accuracy
            _, predicted = torch.max(outputs.data, 1)
            _, correct_class = torch.max(target.data, 1)
            total += target.size(0)
            correct += (predicted == correct_class).sum().item()

            # loss
            running_loss += F.cross_entropy(outputs, target.float()).item() * outputs.size(0)
    
    return {
        "mean_loss": running_loss / total,
        "mean_accuracy": correct / total,
    }

In [21]:
import os
import tempfile

from ray.train import Checkpoint

def train_dishs(config, max_epochs=20, tunning=True):
    # Data Setup

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = get_transfer_learning_model(config['classifier_dropout'])
    model.to(device)

    optimizer = optim.SGD(
        model.parameters(), lr=config["lr"], momentum=config["momentum"], weight_decay=config['weight_decay'])
    for i in range(max_epochs):
        train_log = train_func(model, optimizer, config['clip_value'])
        val_log = test_func(model)

        if tunning:
            with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
                checkpoint = None
                if (i + 1) % max_epochs == 0 and (val_log["mean_loss"] < 0.4):
                    # This saves the model to the trial directory
                    torch.save(
                        model.state_dict(),
                        os.path.join(temp_checkpoint_dir, "model.pth")
                    )
                    checkpoint = Checkpoint.from_directory(temp_checkpoint_dir)

                # Send the current training result back to Tune
                train.report(
                    {
                        "train_mean_loss": train_log["mean_loss"],
                        "train_mean_accuracy": train_log["mean_accuracy"],
                        "val_mean_loss": val_log["mean_loss"],
                        "val_mean_accuracy": val_log["mean_accuracy"],
                    },
                    checkpoint=checkpoint
                )
        else:
            print("-"*10, f"epoch: {i+1}/{max_epochs}","-"*10)
            print(f"train: {train_log}\nval: {val_log}")
    if not tunning:
        return {
            "model": model,
            "log": {
                "train": train_log,
                "val": val_log,
            },
        }

In [8]:
"""config = {
    "lr":0.1,
    "momentum":0.5,
}

train_dishs(config)"""

'config = {\n    "lr":0.1,\n    "momentum":0.5,\n}\n\ntrain_dishs(config)'

In [9]:
from hyperopt import hp
from ray.tune.search.hyperopt import HyperOptSearch

space = {
    "lr": hp.loguniform("lr", -10, -1),
    "momentum": hp.uniform("momentum", 0.1, 0.9),
    "classifier_dropout": hp.uniform("classifier_dropout", 0.5, 0.95),
    "weight_decay": hp.loguniform("weight_decay", -6, -2),
    "clip_value": hp.uniform("clip_value", 0.1, 5.0),
}

hyperopt_search = HyperOptSearch(space, metric="val_mean_accuracy", mode="max")

trainable_with_resources = tune.with_resources(train_dishs, {"gpu": 1})

tuner = tune.Tuner(
    trainable_with_resources,
    tune_config=tune.TuneConfig(
        num_samples=40,
        search_alg=hyperopt_search,
    ),
)
results = tuner.fit()

0,1
Current time:,2024-06-15 14:15:17
Running for:,00:15:51.85
Memory:,6.4/15.6 GiB

Trial name,status,loc,classifier_dropout,clip_value,lr,momentum,weight_decay,iter,total time (s),train_mean_loss,train_mean_accuracy,val_mean_loss
train_dishs_3ad12912,TERMINATED,172.18.58.174:388306,0.838715,4.34739,0.000670334,0.41329,0.022106,20,19.2168,1.04291,0.466667,0.723456
train_dishs_bf2fb1dc,TERMINATED,172.18.58.174:391356,0.878274,1.67659,0.000233061,0.221804,0.00454917,20,21.7716,1.0005,0.466667,0.768805
train_dishs_8b7524b5,TERMINATED,172.18.58.174:394378,0.849991,4.74447,0.000438618,0.349805,0.0159287,20,27.4166,1.07896,0.4,0.740682
train_dishs_2e461696,TERMINATED,172.18.58.174:397444,0.731665,0.944569,0.000170583,0.784582,0.00798063,20,18.3307,0.72301,0.5,0.730685
train_dishs_e5f799c3,TERMINATED,172.18.58.174:400454,0.546397,4.01981,0.00162157,0.257456,0.00266262,20,18.1602,0.581299,0.766667,0.64927
train_dishs_14308d97,TERMINATED,172.18.58.174:403471,0.872002,3.93005,0.00214131,0.565053,0.0161657,20,18.0836,0.76815,0.533333,0.783966
train_dishs_45df32ce,TERMINATED,172.18.58.174:406475,0.835385,1.02255,0.0376528,0.575329,0.125749,20,18.1548,0.659917,0.533333,0.652451
train_dishs_bf3d6216,TERMINATED,172.18.58.174:409480,0.545027,4.88107,0.119672,0.382093,0.0058665,20,18.1144,1.07326,0.4,4.29633
train_dishs_3af69f1c,TERMINATED,172.18.58.174:412485,0.739429,3.13053,0.00293688,0.411806,0.0295035,20,18.2083,0.654154,0.6,0.65492
train_dishs_cb6c9b56,TERMINATED,172.18.58.174:415492,0.76873,2.28598,0.209911,0.501896,0.0159385,20,18.1519,0.825879,0.466667,0.720903


[36m(train_dishs pid=487970)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/home/glucas11/ray_results/train_dishs_2024-06-15_13-59-23/train_dishs_2c1d4416_34_classifier_dropout=0.7594,clip_value=1.7669,lr=0.0624,momentum=0.4600,weight_decay=0.0290_2024-06-15_14-11-34/checkpoint_000000)
2024-06-15 14:15:17,320	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/glucas11/ray_results/train_dishs_2024-06-15_13-59-23' in 0.0085s.
2024-06-15 14:15:17,330	INFO tune.py:1041 -- Total run time: 951.89 seconds (951.84 seconds for the tuning loop).


In [10]:
search_space = {
    "lr": tune.sample_from(lambda spec: 10 ** (-10 * np.random.rand())),
    "momentum": tune.uniform(0.1, 0.9),
}

In [11]:
"""trainable_with_resources = tune.with_resources(train_dishs, {"gpu": 1})
tuner = tune.Tuner(
    trainable_with_resources,
    tune_config=tune.TuneConfig(
        num_samples=20,
        scheduler=ASHAScheduler(metric="mean_accuracy", mode="max"),
        
    ),
    param_space=search_space,
)
results = tuner.fit()

# Obtain a trial dataframe from all run trials of this `tune.run` call.
dfs = {result.path: result.metrics_dataframe for result in results}"""

'trainable_with_resources = tune.with_resources(train_dishs, {"gpu": 1})\ntuner = tune.Tuner(\n    trainable_with_resources,\n    tune_config=tune.TuneConfig(\n        num_samples=20,\n        scheduler=ASHAScheduler(metric="mean_accuracy", mode="max"),\n        \n    ),\n    param_space=search_space,\n)\nresults = tuner.fit()\n\n# Obtain a trial dataframe from all run trials of this `tune.run` call.\ndfs = {result.path: result.metrics_dataframe for result in results}'

In [12]:
best_result = results.get_best_result("val_mean_loss", mode="min")
best_result.metrics

{'train_mean_loss': 0.3981822431087494,
 'train_mean_accuracy': 0.8666666666666667,
 'val_mean_loss': 0.3904951214790344,
 'val_mean_accuracy': 0.8,
 'timestamp': 1718475143,
 'checkpoint_dir_name': 'checkpoint_000000',
 'should_checkpoint': True,
 'done': True,
 'training_iteration': 20,
 'trial_id': '2c1d4416',
 'date': '2024-06-15_14-12-24',
 'time_this_iter_s': 2.1849591732025146,
 'time_total_s': 26.736804723739624,
 'pid': 487970,
 'hostname': 'DESKTOP-GF0BL1G',
 'node_ip': '172.18.58.174',
 'config': {'classifier_dropout': 0.7594161951487975,
  'clip_value': 1.7668885218949568,
  'lr': 0.06241539978108012,
  'momentum': 0.4599630331725607,
  'weight_decay': 0.028962190825140646},
 'time_since_restore': 26.736804723739624,
 'iterations_since_restore': 20,
 'experiment_tag': '34_classifier_dropout=0.7594,clip_value=1.7669,lr=0.0624,momentum=0.4600,weight_decay=0.0290'}

In [14]:
best_result = results.get_best_result("val_mean_loss", mode="min")
with best_result.checkpoint.as_directory() as checkpoint_dir:
    state_dict = torch.load(os.path.join(checkpoint_dir, "model.pth"))

model = get_transfer_learning_model(best_result.config['classifier_dropout']).to(device)
model.load_state_dict(state_dict)
model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.7594161951487975, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.7594161951487975, inplace=False)
  (6): Linear(in_features=4096, out_features=2, bias=True)
)

In [15]:
from PIL import Image
import pandas as pd

def to_csv(model, batch_size=10):
    model.eval()
    PATH_TEST = os.path.join(curr_path, "data/test/")
    test_file_names = os.listdir(PATH_TEST)
    test_file_names.sort()

    submission_csv = {
        "id": [],
        "label": []
    }

    for file_name in test_file_names:
        id = file_name.split(".")[0]
        test_input = Image.open(os.path.join(PATH_TEST, file_name))
        test_input = data_transforms['test'](test_input).to(device).unsqueeze(0)
        with torch.no_grad():
            pred_test_label = model(test_input).max(1).indices.item()
            pred_test_label = class_names[pred_test_label]
        submission_csv['id'].append(id)
        submission_csv['label'].append(pred_test_label)

    submission_csv = pd.DataFrame(submission_csv).set_index("id")
    submission_csv.to_csv("submission.csv")

In [16]:
to_csv(model)

In [22]:
best_config_train_model = train_dishs(best_result.config, max_epochs=100, tunning=False)

---------- epoch: 1/100 ----------
train: {'mean_loss': 0.8157494068145752, 'mean_accuracy': 0.5333333333333333}
val: {'mean_loss': 0.5668608546257019, 'mean_accuracy': 0.8}
---------- epoch: 2/100 ----------
train: {'mean_loss': 0.6545587778091431, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.7011714577674866, 'mean_accuracy': 0.6}
---------- epoch: 3/100 ----------
train: {'mean_loss': 0.8297837972640991, 'mean_accuracy': 0.5666666666666667}
val: {'mean_loss': 0.6197863817214966, 'mean_accuracy': 0.8}
---------- epoch: 4/100 ----------
train: {'mean_loss': 0.6733068227767944, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.8567941784858704, 'mean_accuracy': 0.6}
---------- epoch: 5/100 ----------
train: {'mean_loss': 0.6254743933677673, 'mean_accuracy': 0.6}
val: {'mean_loss': 0.6771092414855957, 'mean_accuracy': 0.6}
---------- epoch: 6/100 ----------
train: {'mean_loss': 0.6015492081642151, 'mean_accuracy': 0.7}
val: {'mean_loss': 0.7042109370231628, 'mean_accuracy': 0.6}
---------- epoch

In [25]:
new_model = best_config_train_model['model']

In [32]:
new_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [33]:
to_csv(new_model)