In [None]:
from google.colab import drive, runtime

In [None]:
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
DRIVE_DIR = '/content/drive/MyDrive'
PROJECT_ROOT_DIR = f'{DRIVE_DIR}/src'
DATA_DIR = f'{PROJECT_ROOT_DIR}/data'
IMG_DIR = f'{DATA_DIR}/images'
HYPERPARAMS_DIR = f'{PROJECT_ROOT_DIR}/hyperparams'
MODULES_DIR = f'{PROJECT_ROOT_DIR}/modules'
IMG_SIZE = 224

In [None]:
import pandas as pd

In [None]:
train_data_path = f'{DATA_DIR}/subsample_datasets/train_data.csv'
train_data = pd.read_csv(train_data_path)

val_data_path = f'{DATA_DIR}/subsample_datasets/val_data.csv'
val_data = pd.read_csv(val_data_path)

test_data_path = f'{DATA_DIR}/subsample_datasets/test_data.csv'
test_data = pd.read_csv(test_data_path)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision import transforms as T
import os

from timeit import default_timer as timer
from tqdm import tqdm

In [None]:
import sys

# Adding the modules path to sys.path
sys.path.append(MODULES_DIR)

# Importing custom modules
from datetime_helper import get_formatted_datetime as gf_datetime, get_formatted_total_runtime as gft_runtime

In [None]:
class_to_idx = {'non-melanoma': 0, 'melanoma': 1}

In [None]:
def get_dataset_classes(class_obj):
  class_to_idx = class_obj
  idx_to_class = {val: key for key, val in class_obj.items()}
  classes = list(class_to_idx.keys())
  num_classes = len(classes)

  return classes, num_classes, class_to_idx, idx_to_class

In [None]:
class SkinLesionDataset(Dataset):
    classes, num_classes, class_to_idx, idx_to_class = get_dataset_classes(class_to_idx)

    def __init__(self, dataset_file, img_dir, transform=None) -> None:
        super().__init__()
        self.dataset = pd.read_csv(dataset_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self) -> int:
        return len(self.dataset)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, f'{self.dataset.iloc[idx, 0]}.jpg')
        image = Image.open(img_path)
        label = self.dataset.iloc[idx, 1]
        if self.transform:
            image, label = self.transform(image, label)
        return image, label

In [None]:
transform_augmented = T.Compose([
    T.Resize(size=(IMG_SIZE, IMG_SIZE)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(degrees=45),
    T.ToTensor()
])

transform_normal = T.Compose([T.Resize(size=(IMG_SIZE, IMG_SIZE)), T.ToTensor()])

target_transform = torch.tensor

def apply_normal_transform(image, label):
  return transform_normal(image), target_transform(label, dtype=torch.long)

def apply_train_transform(image, label):
  return transform_augmented(image), target_transform(label, dtype=torch.long)

In [None]:
train_dataset = SkinLesionDataset(train_data_path, IMG_DIR, apply_train_transform)
val_dataset = SkinLesionDataset(val_data_path, IMG_DIR, apply_normal_transform)
test_dataset = SkinLesionDataset(test_data_path, IMG_DIR, apply_normal_transform)

In [None]:
print(f'train_dataset size: {len(train_dataset)} \nval_dataset size: {len(val_dataset)} \ntest_dataset size: {len(test_dataset)} \ntotal: {len(train_dataset) + len(val_dataset) + len(test_dataset)}')

train_dataset size: 6300 
val_dataset size: 1350 
test_dataset size: 1350 
total: 9000


In [None]:
# Get a random index from train_dataset
import random
random_idx_item = random.randint(0, len(train_dataset) - 1)

' # Get a random index from train_dataset\nimport random\nrandom_idx_item = random.randint(0, len(train_dataset) - 1) '

In [None]:
try:
  print('Train Dataset image index:', random_idx_item)
  print('Image:', train_dataset[random_idx_item][0].shape)
  print('Label:', train_dataset[random_idx_item][1], '| Label to class:', SkinLesionDataset.idx_to_class[train_dataset[random_idx_item][1].item()])
except OSError:
  print('Cannot open the image file...Please, waiting a few more and try again!')
except Exception as error:
  print(f'Unexpected {error=}, {type(error)=}!')

" try:\n  print('Train Dataset image index:', random_idx_item)\n  print('Image:', train_dataset[random_idx_item][0].shape)\n  print('Label:', train_dataset[random_idx_item][1], '| Label to class:', SkinLesionDataset.idx_to_class[train_dataset[random_idx_item][1].item()])\nexcept OSError:\n  print('Cannot open the image file...Please, waiting a few more and try again!')\nexcept Exception as error:\n  print(f'Unexpected {error=}, {type(error)=}!') "

In [None]:
train_dataset[random_idx_item][0].min(), train_dataset[random_idx_item][0].max(), train_dataset[random_idx_item][0].mean()

' train_dataset[random_idx_item][0].min(), train_dataset[random_idx_item][0].max(), train_dataset[random_idx_item][0].mean() '

In [None]:
print(
    f'SkinLesionDataset classes info\n{"-" * 53}',
    '\nnum_classes:      ', SkinLesionDataset.num_classes,
    '\nclasses:          ', SkinLesionDataset.classes,
    '\nclass_to_idx:     ', SkinLesionDataset.class_to_idx,
    '\nidx_to_class:     ', SkinLesionDataset.idx_to_class
)

SkinLesionDataset classes info
----------------------------------------------------- 
num_classes:       2 
classes:           ['non-melanoma', 'melanoma'] 
class_to_idx:      {'non-melanoma': 0, 'melanoma': 1} 
idx_to_class:      {0: 'non-melanoma', 1: 'melanoma'}


In [None]:
import torch.nn as nn
import torch.optim as optim

In [None]:
class SkinLesionClassifier(nn.Module):
  def __init__(
      self,
      n_channels=3,
      conv1_feature_maps=16,
      conv1_kernel_size=5,
      conv2_feature_maps=32,
      conv2_kernel_size=5,
      conv3_feature_maps=64,
      conv3_kernel_size=5,
      conv4_feature_maps=128,
      conv4_kernel_size=5,
      fc1_neurons=512,
      fc2_neurons=256,
      fc3_neurons=128,
      n_classes=2,
      dropout=0.2
    ):
    super(SkinLesionClassifier, self).__init__()

    # feature extraction
    self.conv_layers = nn.Sequential(
      nn.Conv2d(in_channels=n_channels, out_channels=conv1_feature_maps, kernel_size=(conv1_kernel_size,conv1_kernel_size), stride=1, padding=2),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

      nn.Conv2d(in_channels=conv1_feature_maps, out_channels=conv2_feature_maps, kernel_size=(conv2_kernel_size,conv2_kernel_size), stride=1, padding=2),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

      nn.Conv2d(in_channels=conv2_feature_maps, out_channels=conv3_feature_maps, kernel_size=(conv3_kernel_size,conv3_kernel_size), stride=1, padding=2),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),

      nn.Conv2d(in_channels=conv3_feature_maps, out_channels=conv4_feature_maps, kernel_size=(conv4_kernel_size,conv4_kernel_size), stride=1, padding=2),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
    )

    self.flatten = nn.Flatten(start_dim=1)

    # classification
    self.fc_layers = nn.Sequential(
      nn.Linear(conv4_feature_maps * 14 * 14, fc1_neurons),
      nn.ReLU(),
      nn.Dropout(p=dropout),

      nn.Linear(fc1_neurons , fc2_neurons),
      nn.ReLU(),
      nn.Dropout(p=dropout),

      nn.Linear(fc2_neurons , fc3_neurons),
      nn.ReLU(),
      nn.Dropout(p=dropout),

      nn.Linear(fc3_neurons, n_classes)
    )

  def forward(self, x):
    out = self.conv_layers(x)
    out = self.flatten(out)
    out = self.fc_layers(out)
    return out

In [None]:
import multiprocessing as mp
print(f'Multiprocessing CPU count: {mp.cpu_count()}')

Multiprocessing CPU count: 12


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if device.type == 'cuda':
  device_count = torch.cuda.device_count()
  gpu_name = torch.cuda.get_device_name(0)
  print(f"Using {gpu_name} GPU with {device_count} available devic{'es' if device_count > 1 else 'e'}.")
else:
    print("GPU is not available, using CPU instead")

Using NVIDIA A100-SXM4-40GB GPU with 1 available device.


In [None]:
max_epochs = 100
learning_rate = 0.001
batch_size = 64

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=mp.cpu_count(), pin_memory=True)

In [None]:
date_start = timer()

# Extraindo X (images) e y (labels) de train_dataset
X_list = []
y_list = []

print('Extracting X (images) and y (labels) from train_dataset\n')

for batch, (images, labels) in enumerate(train_loader):
  print(f'Running batch [{batch+1}/{len(train_loader)}]')
  X_list.append(images) # Append images
  y_list.append(labels) # Append labels

print('\nConverting lists X and y to tensors')

# Converte as listas para tensors
X = torch.cat(X_list, dim=0)
y = torch.cat(y_list, dim=0)

# Print shapes para verificação
print("\nShape of X:", X.shape)
print("Shape of y:", y.shape)

date_end = timer()

print(f'Total runtime: {gft_runtime(date_start, date_end)}')

' date_start = timer()\n\n# Extraindo X (images) e y (labels) de train_dataset\nX_list = []\ny_list = []\n\nprint(\'Extracting X (images) and y (labels) from train_dataset\n\')\n\nfor batch, (images, labels) in enumerate(train_loader):\n  print(f\'Running batch [{batch+1}/{len(train_loader)}]\')\n  X_list.append(images) # Append images\n  y_list.append(labels) # Append labels\n\nprint(\'\nConverting lists X and y to tensors\')\n\n# Converte as listas para tensors\nX = torch.cat(X_list, dim=0)\ny = torch.cat(y_list, dim=0)\n\n# Print shapes para verificação\nprint("\nShape of X:", X.shape)\nprint("Shape of y:", y.shape)\n\ndate_end = timer()\n\nprint(f\'Total runtime: {gft_runtime(date_start, date_end)}\') '

In [None]:
# Salvar tensores X e y em um arquivo
current_date = gf_datetime(custom_format="%Y-%m-%d_%H-%M-%S", timezone_hours=-3)

X_tensor_filename = f'{current_date}_X_tensor_to_fit.pt'
y_tensor_filename = f'{current_date}_y_tensor_to_fit.pt'

torch.save(X, f'{HYPERPARAMS_DIR}/{X_tensor_filename}')
torch.save(y, f'{HYPERPARAMS_DIR}/{y_tensor_filename}')

' # Salvar tensores X e y em um arquivo\ncurrent_date = gf_datetime(custom_format="%Y-%m-%d_%H-%M-%S", timezone_hours=-3)\n\nX_tensor_filename = f\'{current_date}_X_tensor_to_fit.pt\'\ny_tensor_filename = f\'{current_date}_y_tensor_to_fit.pt\'\n\ntorch.save(X, f\'{HYPERPARAMS_DIR}/{X_tensor_filename}\')\ntorch.save(y, f\'{HYPERPARAMS_DIR}/{y_tensor_filename}\') '

In [None]:
!pip install skorch

Collecting skorch
  Downloading skorch-1.0.0-py3-none-any.whl.metadata (11 kB)
Downloading skorch-1.0.0-py3-none-any.whl (239 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/239.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.4/239.4 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: skorch
Successfully installed skorch-1.0.0


In [None]:
import skorch

In [None]:
skorch.__version__

'1.0.0'

In [None]:
from skorch import NeuralNetClassifier
from sklearn.model_selection import RandomizedSearchCV

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Defining the loss function
criterion = nn.CrossEntropyLoss()

# Defining the optimizer
optimizer = optim.Adam

In [None]:
# Random Search Hyperparameters

params = {
    'optimizer__lr': [0.0001, 0.001, 0.01],
    'max_epochs': [50, 75, 100],
    'batch_size': [32, 64],
    'module__dropout': [0.2, 0.3, 0.5],
    'module__conv1_feature_maps': [32, 64, 128, 256],
    'module__conv1_kernel_size': [3, 5, 7],
    'module__conv2_feature_maps': [32, 64, 128, 256],
    'module__conv2_kernel_size': [3, 5, 7],
    'module__conv3_feature_maps': [32, 64, 128, 256],
    'module__conv3_kernel_size': [3, 5, 7],
    'module__conv4_feature_maps': [32, 64, 128],
    'module__conv4_kernel_size': [3, 5, 7],
    'module__fc1_neurons': [128, 256, 512],
    'module__fc2_neurons': [128, 256, 512],
    'module__fc3_neurons': [128, 256, 512],
}

In [None]:
net = NeuralNetClassifier(
    module=SkinLesionClassifier,
    max_epochs=max_epochs,
    optimizer=optimizer,
    criterion=criterion,
    lr=learning_rate,
    verbose=0,
    train_split=False,
    device=device
)

In [None]:
print(net.initialize())

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=SkinCancerClassifier(
    (conv_layers): Sequential(
      (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (4): ReLU()
      (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (7): ReLU()
      (8): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (9): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (10): ReLU()
      (11): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    )
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (fc_layers): Sequential(
      (0): Linear(in_featu

In [None]:
# Carregar tensores do arquivo
X_to_fit = torch.load(f'{HYPERPARAMS_DIR}/{X_tensor_filename}')
y_to_fit = torch.load(f'{HYPERPARAMS_DIR}/{y_tensor_filename}')

# Agora você pode usar X e y normalmente
print(X_to_fit.size())
print(y_to_fit.size())

torch.Size([6300, 3, 224, 224])
torch.Size([6300])


In [None]:
import gc

def clear_gpu_memory():
  gc.collect()
  torch.cuda.empty_cache()

In [None]:
import numpy as np

In [None]:
n_iter = 30
cv = 5
n_jobs = int(mp.cpu_count() * 0.35)
pre_dispatch = int(1.5*n_jobs)

In [None]:
rs = RandomizedSearchCV(
    estimator=net,
    param_distributions=params,
    refit=False,
    verbose=2,
    scoring='f1',
    n_iter=n_iter,
    cv=cv,
    n_jobs=n_jobs,
    pre_dispatch=pre_dispatch,
)

In [None]:
date_start = timer()

rs.fit(X_to_fit, y_to_fit)

clear_gpu_memory()

print(f'\nSearch using cv={cv} and n_iter={n_iter} is complete!\n')

rs_log_status = '"best_score": {:.6f}, "best_params": "{}"'.format(rs.best_score_, rs.best_params_)
print(rs_log_status)

current_date = gf_datetime(custom_format="%Y-%m-%d_%H-%M-%S", timezone_hours=-3)
best_params_filename = f'{HYPERPARAMS_DIR}/{current_date}_best_params.npy'
np.save(best_params_filename, rs.best_params_)

rs_log_status = f'"filename": "{best_params_filename}", {rs_log_status}'

date_end = timer()

total_runtime = gft_runtime(date_start, date_end)

print(f'\nTotal runtime: {total_runtime}')

Fitting 5 folds for each of 30 candidates, totalling 150 fits

Search using cv=5 and n_iter=30 is complete!

"best_score": 0.743416, "best_params": "{'optimizer__lr': 0.0001, 'module__fc3_neurons': 512, 'module__fc2_neurons': 512, 'module__fc1_neurons': 128, 'module__dropout': 0.3, 'module__conv4_kernel_size': 5, 'module__conv4_feature_maps': 64, 'module__conv3_kernel_size': 5, 'module__conv3_feature_maps': 64, 'module__conv2_kernel_size': 5, 'module__conv2_feature_maps': 256, 'module__conv1_kernel_size': 3, 'module__conv1_feature_maps': 128, 'max_epochs': 50, 'batch_size': 64}"

Total runtime: 4:47:50.091717


In [None]:
run_date, run_time = current_date.split('_')
run_time = ':'.join(run_time.split('-'))
run_datetime = f'{run_date} {run_time}'

In [None]:
best_score_found = f'{{"date": "{run_datetime}", "total_runtime": "{total_runtime}", {rs_log_status}}}'
print(best_score_found)

{"date": "2024-07-28 08:22:23", "total_runtime": "4:47:50.091717", "filename": "/content/drive/MyDrive/src/hyperparams/2024-07-28_08-22-23_best_params.npy", "best_score": 0.743416, "best_params": "{'optimizer__lr': 0.0001, 'module__fc3_neurons': 512, 'module__fc2_neurons': 512, 'module__fc1_neurons': 128, 'module__dropout': 0.3, 'module__conv4_kernel_size': 5, 'module__conv4_feature_maps': 64, 'module__conv3_kernel_size': 5, 'module__conv3_feature_maps': 64, 'module__conv2_kernel_size': 5, 'module__conv2_feature_maps': 256, 'module__conv1_kernel_size': 3, 'module__conv1_feature_maps': 128, 'max_epochs': 50, 'batch_size': 64}"}


In [None]:
import json

best_score_found_json = json.loads(best_score_found)

best_params = json.loads(best_score_found_json['best_params'].replace("'", '"'))
best_score_found_json['best_params'] = best_params

print(best_score_found_json)

with open(f'{HYPERPARAMS_DIR}/best_params.log', mode="a+", encoding="utf-8") as log_params:
  log_params.write(json.dumps(best_score_found_json) + '\n')

{'date': '2024-07-28 08:22:23', 'total_runtime': '4:47:50.091717', 'filename': '/content/drive/MyDrive/src/hyperparams/2024-07-28_08-22-23_best_params.npy', 'best_score': 0.743416, 'best_params': {'optimizer__lr': 0.0001, 'module__fc3_neurons': 512, 'module__fc2_neurons': 512, 'module__fc1_neurons': 128, 'module__dropout': 0.3, 'module__conv4_kernel_size': 5, 'module__conv4_feature_maps': 64, 'module__conv3_kernel_size': 5, 'module__conv3_feature_maps': 64, 'module__conv2_kernel_size': 5, 'module__conv2_feature_maps': 256, 'module__conv1_kernel_size': 3, 'module__conv1_feature_maps': 128, 'max_epochs': 50, 'batch_size': 64}}


In [None]:
best_params_loaded = np.load(best_params_filename, allow_pickle=True).item()

print(best_params_loaded)

{'optimizer__lr': 0.0001, 'module__fc3_neurons': 512, 'module__fc2_neurons': 512, 'module__fc1_neurons': 128, 'module__dropout': 0.3, 'module__conv4_kernel_size': 5, 'module__conv4_feature_maps': 64, 'module__conv3_kernel_size': 5, 'module__conv3_feature_maps': 64, 'module__conv2_kernel_size': 5, 'module__conv2_feature_maps': 256, 'module__conv1_kernel_size': 3, 'module__conv1_feature_maps': 128, 'max_epochs': 50, 'batch_size': 64}


In [None]:
model_with_best_params = SkinLesionClassifier(
    conv1_feature_maps=best_params_loaded['module__conv1_feature_maps'],
    conv1_kernel_size=best_params_loaded['module__conv1_kernel_size'],
    conv2_feature_maps=best_params_loaded['module__conv2_feature_maps'],
    conv2_kernel_size=best_params_loaded['module__conv2_kernel_size'],
    conv3_feature_maps=best_params_loaded['module__conv3_feature_maps'],
    conv3_kernel_size=best_params_loaded['module__conv3_kernel_size'],
    conv4_feature_maps=best_params_loaded['module__conv4_feature_maps'],
    conv4_kernel_size=best_params_loaded['module__conv4_kernel_size'],
    dropout=best_params_loaded['module__dropout'],
    fc1_neurons=best_params_loaded['module__fc1_neurons'],
    fc2_neurons=best_params_loaded['module__fc2_neurons'],
    fc3_neurons=best_params_loaded['module__fc3_neurons'],
)

print(model_with_best_params)

SkinCancerClassifier(
  (conv_layers): Sequential(
    (0): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(128, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc_layers): Sequential(
    (0): Linear(in_features=12544, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3):

In [None]:
runtime.unassign()