In [None]:
import os
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, models
from torchvision.models.resnet import ResNet50_Weights


from utils.dataset import SkinDataset
from utils.utils import train, validate, test, load_data_file
from utils.metric import MetricsMonitor

## Data Transformation

In [None]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  # Randomly flip images horizontally with 50% probability
    transforms.RandomAffine(
        degrees=15,                           # Random rotation within [-15, 15] degrees
        translate=(0.1, 0.1),                 # Random shift by up to 10% in both x and y directions
    ),
    transforms.Resize((224, 224)),           # Resize to match ResNet input size
    transforms.ToTensor(),                   # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet stats
])

## Load Data

In [None]:
CLASSES = ['nevus', 'others']
BATCH_SIZE = 64
EPOCHS = 100
LR = 0.0005
device = 'mps' if torch.backends.mps.is_available() else 'cuda' if torch.cuda.is_available() else 'cpu'

In [7]:
## Load data paths and labels
train_path, train_labels = load_data_file('datasets/train.txt')
train_path, val_path, train_labels, val_labels = train_test_split(train_path, train_labels, test_size=0.2, random_state=42, stratify=train_labels)
test_path, test_labels = load_data_file('datasets/val.txt')

## Create datasets and dataloaders
train_dataset = SkinDataset(train_path, train_labels, transform)
val_dataset = SkinDataset(val_path, val_labels, transform)
test_dataset = SkinDataset(test_path, test_labels, transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
print('================== Train dataset Info: ==================\n', train_dataset)
print('================== Val dataset Info: ==================\n', val_dataset)
print('================== Test dataset Info: ==================\n', test_dataset)

 Dataset: 12156 samples
Class distribution: {0: 6180, 1: 5976}

 Dataset: 3039 samples
Class distribution: {1: 1494, 0: 1545}

 Dataset: 3796 samples
Class distribution: {0: 1931, 1: 1865}



## Model

In [None]:
# Model
model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
model.fc = torch.nn.Linear(model.fc.in_features, len(CLASSES))
model = model.to(device)



## Training

In [11]:
# Loss and Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

# Monitors
train_monitor = MetricsMonitor(metrics=["loss", "accuracy"])
val_monitor = MetricsMonitor(metrics=["loss", "accuracy"], patience=5, mode="max")

In [None]:
# Training Loop
for epoch in range(EPOCHS):
    print(f"Epoch {epoch + 1}/{EPOCHS}")
    train(model, train_loader, criterion, optimizer, device, train_monitor)
    validate(model, val_loader, criterion, device, val_monitor)
    val_acc = val_monitor.compute_average("accuracy")
    if val_monitor.early_stopping_check(val_acc, model):
        print("Early stopping triggered.")
        break

## Testing

In [None]:
test(model, test_loader, device)

In [None]:
import nbformat

# Load the notebook
notebook_path = "Binary.ipynb"
output_script_path = "exp.py"
with open(notebook_path, 'r', encoding='utf-8') as f:
    notebook = nbformat.read(f, as_version=4)

# Extract code cells only
code_cells = [cell['source'] for cell in notebook.cells if cell.cell_type == 'code']
code_cells = code_cells[:-1]  # Exclude the last cell

# Save as a .py file
with open(output_script_path, 'w', encoding='utf-8') as f:
    f.write("\n\n".join(code_cells))

In [1]:
!pip install basic-image-eda

Collecting basic-image-eda
  Downloading basic_image_eda-0.0.3-py3-none-any.whl.metadata (7.1 kB)
Downloading basic_image_eda-0.0.3-py3-none-any.whl (9.8 kB)
Installing collected packages: basic-image-eda
Successfully installed basic-image-eda-0.0.3
[0m

In [4]:
from basic_image_eda import BasicImageEDA

if __name__ == "__main__":  # for multiprocessing
    data_dir = "./data"
    BasicImageEDA.explore('/root/huy/datasets/Binary')
        
    # or
    
    extensions = ['png', 'jpg', 'jpeg']
    threads = 0
    dimension_plot = True
    channel_hist = True
    nonzero = False
    hw_division_factor = 1.0
    
    BasicImageEDA.explore(data_dir, extensions, threads, dimension_plot, channel_hist, nonzero, hw_division_factor)


found 18991 images.
Using 16 threads. (max:16)



100%|██████████| 18991/18991 [01:29<00:00, 213.36it/s]



*--------------------------------------------------------------------------------------*
number of images                         |  18991

dtype                                    |  uint8
channels                                 |  [3]
extensions                               |  ['jpg']

min height                               |  450
max height                               |  1024
mean height                              |  761.2003580643462
median height                            |  768

min width                                |  576
max width                                |  1024
mean width                               |  855.1536517297667
median width                             |  1024

mean height/width ratio                  |  0.8901328510082651
median height/width ratio                |  0.75
recommended input size(by mean)          |  [760 856] (h x w, multiples of 8)
recommended input size(by mean)          |  [768 848] (h x w, multiples of 16)
recommended input size

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [5]:
import os
os.cpu_count()

16