In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# ========== SETTINGS ==========
SOURCE_FOLDER = r'C:\Users\USER\Downloads\data\data2\train_brain'  # Original dataset folder
DEST_FOLDER = r'C:\Users\USER\Downloads\data\imageclassifier\brain'               # Where to save train/val/test
TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1
MAX_IMAGES_PER_CLASS = 160  # <-- Change this to limit how many images you use per class
SEED = 42

# ========== MAKE OUTPUT FOLDERS ==========
for split in ['train', 'val', 'test']:
    os.makedirs(os.path.join(DEST_FOLDER, split), exist_ok=True)

# ========== PROCESS EACH CLASS ==========
for class_name in sorted(os.listdir(SOURCE_FOLDER)):
    class_path = os.path.join(SOURCE_FOLDER, class_name)
    if not os.path.isdir(class_path):
        continue  # Skip if not a folder

    # Get all image files in the class folder
    all_images = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    all_images = all_images[:MAX_IMAGES_PER_CLASS]  # Limit if needed

    # Split images into train, val, test
    train_imgs, temp_imgs = train_test_split(all_images, train_size=TRAIN_RATIO, random_state=SEED)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=TEST_RATIO / (VAL_RATIO + TEST_RATIO), random_state=SEED)

    # Save to new folders with flat structure
    for split_name, image_list in zip(['train', 'val', 'test'], [train_imgs, val_imgs, test_imgs]):
        for img_name in image_list:
            src = os.path.join(class_path, img_name)
            # New name with class in filename to keep track
            new_name = f"{class_name.replace(' ', '_')}__{img_name}"
            dst = os.path.join(DEST_FOLDER, split_name, new_name)
            shutil.copy2(src, dst)

print("✅ Done! All images are copied and split into flat train/val/test folders.")


✅ Done! All images are copied and split into flat train/val/test folders.


In [6]:
import os
import shutil
import random

# ========== SETTINGS ==========
SOURCE_FOLDER = r'C:\Users\USER\Downloads\data\data2\train_chest'  # Your flat folder with images
DEST_FOLDER = r'C:\Users\USER\Downloads\data\imageclassifier\chest'        # Where train/val/test will go

TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1
MAX_IMAGES = None  # Set to an int to limit, or None to use all
SEED = 42

# ========== PREP DESTINATION ==========
random.seed(SEED)

for split in ['train', 'val', 'test']:
    split_path = os.path.join(DEST_FOLDER, split)
    if os.path.exists(split_path):
        shutil.rmtree(split_path)
    os.makedirs(split_path)

# ========== GET & SHUFFLE IMAGE LIST ==========
all_images = [f for f in os.listdir(SOURCE_FOLDER) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
if MAX_IMAGES:
    all_images = all_images[:MAX_IMAGES]

random.shuffle(all_images)

# ========== SPLIT ==========
n_total = len(all_images)
n_train = int(n_total * TRAIN_RATIO)
n_val = int(n_total * VAL_RATIO)
n_test = n_total - n_train - n_val  # Remaining for test

train_imgs = all_images[:n_train]
val_imgs = all_images[n_train:n_train + n_val]
test_imgs = all_images[n_train + n_val:]

# ========== COPY ==========
for split_name, img_list in zip(['train', 'val', 'test'], [train_imgs, val_imgs, test_imgs]):
    for img_name in img_list:
        src = os.path.join(SOURCE_FOLDER, img_name)
        dst = os.path.join(DEST_FOLDER, split_name, img_name)
        shutil.copy2(src, dst)

print(f"✅ Done! Split {n_total} images into:")
print(f"   - {len(train_imgs)} for training")
print(f"   - {len(val_imgs)} for validation")
print(f"   - {len(test_imgs)} for testing")


✅ Done! Split 208 images into:
   - 166 for training
   - 20 for validation
   - 22 for testing


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, f1_score
from torch.cuda.amp import GradScaler, autocast
from torch.nn.utils import clip_grad_norm_
import copy
from datetime import datetime

# ========================= CONFIG =========================
TRAIN_DIR = r'C:/Users/USER/Downloads/data/imageclassifier/train'
VAL_DIR = r'C:/Users/USER/Downloads/data/imageclassifier/val'
CHECKPOINT_DIR = r'C:/Users/USER/Downloads/data/imageclassifier/checkpoints'
NUM_CLASSES = 4
BATCH_SIZE = 32
EPOCHS = 30
EARLY_STOP_PATIENCE = 5
LR = 1e-4
WEIGHT_DECAY = 1e-4
GRAD_CLIP = 5.0
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# ========================= DATA =========================
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

datasets_dict = {
    'train': datasets.ImageFolder(TRAIN_DIR, data_transforms['train']),
    'val': datasets.ImageFolder(VAL_DIR, data_transforms['val'])
}

dataloaders = {
    split: DataLoader(datasets_dict[split], batch_size=BATCH_SIZE, shuffle=(split == 'train'))
    for split in ['train', 'val']
}

# ========================= MODEL =========================
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, verbose=True)
scaler = GradScaler()

# ========================= TRAIN =========================
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
best_f1 = 0.0
epochs_no_improve = 0

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print("-"*20)

    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        running_loss = 0.0
        running_corrects = 0
        all_preds = []
        all_labels = []

        for inputs, labels in dataloaders[phase]:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                with autocast():
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                preds = torch.argmax(outputs, 1)

                if phase == 'train':
                    scaler.scale(loss).backward()
                    clip_grad_norm_(model.parameters(), GRAD_CLIP)
                    scaler.step(optimizer)
                    scaler.update()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        epoch_loss = running_loss / len(datasets_dict[phase])
        epoch_acc = running_corrects.double() / len(datasets_dict[phase])
        print(f"{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

        if phase == 'val':
            scheduler.step(epoch_loss)
            f1 = f1_score(all_labels, all_preds, average='macro')
            print(f"Val F1-score: {f1:.4f}")

            if f1 > best_f1:
                best_f1 = f1
                best_model_wts = copy.deepcopy(model.state_dict())
                checkpoint_path = os.path.join(CHECKPOINT_DIR, f'best_model_f1_{f1:.4f}.pth')
                torch.save(best_model_wts, checkpoint_path)
                print(f"✅ Saved improved model to: {checkpoint_path}")
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1

    if epochs_no_improve >= EARLY_STOP_PATIENCE:
        print("\n🛑 Early stopping triggered.")
        break


  scaler = GradScaler()



Epoch 1/30
--------------------


  with autocast():


Train Loss: 0.1313 Acc: 0.9627
Val Loss: 0.0052 Acc: 0.9939
Val F1-score: 0.9868
✅ Saved improved model to: C:/Users/USER/Downloads/data/imageclassifier/checkpoints\best_model_f1_0.9868.pth

Epoch 2/30
--------------------


  with autocast():


Train Loss: 0.0008 Acc: 1.0000
Val Loss: 0.0000 Acc: 1.0000
Val F1-score: 1.0000
✅ Saved improved model to: C:/Users/USER/Downloads/data/imageclassifier/checkpoints\best_model_f1_1.0000.pth

Epoch 3/30
--------------------


  with autocast():


Train Loss: 0.0074 Acc: 0.9992
Val Loss: 0.0000 Acc: 1.0000
Val F1-score: 1.0000

Epoch 4/30
--------------------


  with autocast():


Train Loss: 0.0002 Acc: 1.0000
Val Loss: 0.0000 Acc: 1.0000
Val F1-score: 1.0000

Epoch 5/30
--------------------


  with autocast():


Train Loss: 0.0000 Acc: 1.0000
Val Loss: 0.0000 Acc: 1.0000
Val F1-score: 1.0000

Epoch 6/30
--------------------


  with autocast():


Train Loss: 0.0010 Acc: 1.0000
Val Loss: 0.0000 Acc: 1.0000
Val F1-score: 1.0000

Epoch 7/30
--------------------


  with autocast():


Train Loss: 0.0000 Acc: 1.0000
Val Loss: 0.0000 Acc: 1.0000
Val F1-score: 1.0000

🛑 Early stopping triggered.


: 