In [1]:
!pip install medmnist albumentations opencv-python matplotlib pillow torch torchvision scikit-learn tqdm



In [2]:
import os
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
import albumentations as A
from medmnist import OCTMNIST, BreastMNIST, PneumoniaMNIST, RetinaMNIST, INFO

# Output directory
os.makedirs('split_data', exist_ok=True)

# Offsets for label uniqueness
offsets = {
    'octmnist': 0,
    'breastmnist': len(INFO['octmnist']['label']),
    'pneumoniamnist': len(INFO['octmnist']['label']) + len(INFO['breastmnist']['label']),
    'retinamnist': len(INFO['octmnist']['label']) + len(INFO['breastmnist']['label']) + len(INFO['pneumoniamnist']['label']),
}

# Preprocessing
def preprocess_general(img):
    if img.ndim == 3 and img.shape[-1] == 3:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    elif img.ndim == 3 and img.shape[-1] == 1:
        img = img.squeeze()
    return img.astype(np.float32) / 255.0

def preprocess_retina(img):
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    denoised = cv2.medianBlur(gray, 3)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return clahe.apply(denoised).astype(np.float32) / 255.0

# Load + preprocess
def load_and_preprocess(dataset_cls, offset, is_retina=False):
    X_train, y_train, X_val, y_val, X_test, y_test = [], [], [], [], [], []
    for split_name in ['train', 'val', 'test']:
        dataset = dataset_cls(split=split_name, download=True)
        for img, label in zip(dataset.imgs, dataset.labels.squeeze()):
            label += offset
            
            if split_name == 'train':
                proc_img = preprocess_retina(img) if is_retina else preprocess_general(img)
                X_train.append(proc_img)
                y_train.append(label)
            elif split_name == 'val':
                proc_img = preprocess_general(img)
                X_val.append(proc_img)
                y_val.append(label)
            else:
                proc_img = preprocess_general(img)
                X_test.append(proc_img)
                y_test.append(label)
    return X_train, y_train, X_val, y_val, X_test, y_test

# Master load
X_train, y_train, X_val, y_val, X_test, y_test = [], [], [], [], [], []
for name, cls in zip(['octmnist', 'breastmnist', 'pneumoniamnist', 'retinamnist'],
                     [OCTMNIST, BreastMNIST, PneumoniaMNIST, RetinaMNIST]):
    is_retina = (name == 'retinamnist')
    Xt, yt, Xv, yv, Xte, yte = load_and_preprocess(cls, offsets[name], is_retina)
    X_train += Xt
    y_train += yt
    X_val += Xv
    y_val += yv
    X_test += Xte
    y_test += yte

X_train, y_train = np.array(X_train), np.array(y_train)
X_val, y_val = np.array(X_val), np.array(y_val)
X_test, y_test = np.array(X_test), np.array(y_test)

# Print distributions
def print_distribution(name, labels):
    print(f"\n📊 {name} Set Class Distribution:")
    for cls, count in zip(*np.unique(labels, return_counts=True)):
        print(f"Class {cls}: {count}")

print_distribution("Train", y_train)
print_distribution("Validation", y_val)
print_distribution("Test", y_test)

# Augmentation
AUGMENT_CLASSES = [cls for cls, count in zip(*np.unique(y_train, return_counts=True)) if count < 4000]
augment = A.Compose([
    A.Rotate(limit=15, p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.RandomBrightnessContrast(p=0.3),
    A.ElasticTransform(p=0.2),
])

aug_imgs, aug_labels = [], []
for cls in AUGMENT_CLASSES:
    cls_idxs = np.where(y_train == cls)[0]
    cls_imgs = X_train[cls_idxs]
    needed = 4000 - len(cls_imgs)
    for _ in range(needed):
        img = cls_imgs[np.random.randint(len(cls_imgs))]
        img_aug = augment(image=(img * 255).astype(np.uint8))['image']
        aug_imgs.append(img_aug.astype(np.float32) / 255.0)
        aug_labels.append(cls)

# Append
if aug_imgs:
    X_train = np.concatenate([X_train, np.stack(aug_imgs)])
    y_train = np.concatenate([y_train, np.array(aug_labels)])
    print(f"\n✅ Augmented with {len(aug_labels)} new samples")

# Save
def save_split(X, y, path):
    df = pd.DataFrame(X.reshape((X.shape[0], -1)))
    df['label'] = y
    df.to_csv(path, index=False)

save_split(X_train, y_train, "split_data/train.csv")
save_split(X_val, y_val, "split_data/val.csv")
save_split(X_test, y_test, "split_data/test.csv")

print("\n✅ Saved train, val, and test CSVs to 'split_data/'")

Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/octmnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/octmnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/octmnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/breastmnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/breastmnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/breastmnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/pneumoniamnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.medmnist/retinamnist.npz
Using downloaded and verified file: /teamspace/studios/this_studio/.me

In [3]:
import pandas as pd
import numpy as np

# Load processed splits
df_train = pd.read_csv("split_data/train.csv")
df_val = pd.read_csv("split_data/val.csv")
df_test = pd.read_csv("split_data/test.csv")

def print_class_distribution(df, name):
    labels = df['label'].values
    unique, counts = np.unique(labels, return_counts=True)
    print(f"\n📊 {name} Set Class Distribution:")
    for u, c in zip(unique, counts):
        print(f"Class {u}: {c}")

print_class_distribution(df_train, "Train")
print_class_distribution(df_val, "Validation")
print_class_distribution(df_test, "Test")



📊 Train Set Class Distribution:
Class 0: 33484
Class 1: 10213
Class 2: 7754
Class 3: 46026
Class 4: 4000
Class 5: 4000
Class 6: 4000
Class 7: 4000
Class 8: 4000
Class 9: 4000
Class 10: 4000
Class 11: 4000
Class 12: 4000

📊 Validation Set Class Distribution:
Class 0: 3721
Class 1: 1135
Class 2: 862
Class 3: 5114
Class 4: 21
Class 5: 57
Class 6: 135
Class 7: 389
Class 8: 54
Class 9: 12
Class 10: 28
Class 11: 20
Class 12: 6

📊 Test Set Class Distribution:
Class 0: 250
Class 1: 250
Class 2: 250
Class 3: 250
Class 4: 42
Class 5: 114
Class 6: 234
Class 7: 390
Class 8: 174
Class 9: 46
Class 10: 92
Class 11: 68
Class 12: 20


In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
import torch
import torch.nn as nn
import torchvision.models as models

class CustomResNet18(nn.Module):
    def __init__(self, num_classes=13):
        super(CustomResNet18, self).__init__()
        resnet18 = models.resnet18(weights=None)  # Load base architecture

        resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)

        resnet18.maxpool = nn.Identity()

        resnet18.layer1[0].conv1 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
        resnet18.layer1[0].conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)

        resnet18.layer2[0].conv1 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False)
        resnet18.layer2[0].conv2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False)

        resnet18.fc = nn.Linear(512, num_classes)

        self.model = resnet18

    def forward(self, x):
        return self.model(x)


In [7]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import torch

class MedDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path)
        self.X = df.drop(columns=['label']).values.reshape(-1, 1, 28, 28).astype(np.float32)
        self.y = df['label'].values.astype(np.int64)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx]), torch.tensor(self.y[idx])

# Dataloaders
batch_size = 256

train_loader = DataLoader(MedDataset('split_data/train.csv'), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(MedDataset('split_data/val.csv'), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(MedDataset('split_data/test.csv'), batch_size=batch_size, shuffle=False)


In [8]:
import torch
from tqdm import tqdm
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CustomResNet18(num_classes=13).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

def evaluate(model, loader):
    model.eval()
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for X, y in loader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            y_true.extend(y.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

    try:
        auc = roc_auc_score(y_true, y_probs, multi_class='ovr')
    except:
        auc = 0.0

    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'f1': f1_score(y_true, y_pred, average='weighted'),
        'precision': precision_score(y_true, y_pred, average='weighted'),
        'recall': recall_score(y_true, y_pred, average='macro'),
        'auc': auc
    }

# Training loop
epochs = 10
best_val_acc = 0.0

for epoch in range(epochs):
    model.train()
    train_losses = []

    for X, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training"):
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    train_metrics = evaluate(model, train_loader)
    val_metrics = evaluate(model, val_loader)

    print(f"\n📊 Epoch {epoch+1}/{epochs}")
    print(f"Train Loss: {np.mean(train_losses):.4f}")
    print(f"Train Acc: {train_metrics['accuracy']:.4f} | F1: {train_metrics['f1']:.4f} | Precision: {train_metrics['precision']:.4f} | Recall: {train_metrics['recall']:.4f} | AUC: {train_metrics['auc']:.4f}")
    print(f"Val   Acc: {val_metrics['accuracy']:.4f} | F1: {val_metrics['f1']:.4f} | Precision: {val_metrics['precision']:.4f} | Recall: {val_metrics['recall']:.4f} | AUC: {val_metrics['auc']:.4f}")

    if val_metrics['accuracy'] > best_val_acc:
        best_val_acc = val_metrics['accuracy']
        torch.save(model.state_dict(), 'best_custom_resnet18.pth')
        print("✅ Saved new best model.")


Epoch 1/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 38.59it/s]



📊 Epoch 1/10
Train Loss: 0.6139
Train Acc: 0.8445 | F1: 0.8303 | Precision: 0.8527 | Recall: 0.7352 | AUC: 0.9868
Val   Acc: 0.8753 | F1: 0.8676 | Precision: 0.8716 | Recall: 0.5928 | AUC: 0.9854
✅ Saved new best model.


Epoch 2/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.28it/s]



📊 Epoch 2/10
Train Loss: 0.3402
Train Acc: 0.8988 | F1: 0.8891 | Precision: 0.9038 | Recall: 0.8389 | AUC: 0.9941
Val   Acc: 0.8967 | F1: 0.8849 | Precision: 0.8905 | Recall: 0.6466 | AUC: 0.9890
✅ Saved new best model.


Epoch 3/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.48it/s]



📊 Epoch 3/10
Train Loss: 0.2302
Train Acc: 0.9227 | F1: 0.9168 | Precision: 0.9269 | Recall: 0.8941 | AUC: 0.9957
Val   Acc: 0.8934 | F1: 0.8818 | Precision: 0.8901 | Recall: 0.6146 | AUC: 0.9875


Epoch 4/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.41it/s]



📊 Epoch 4/10
Train Loss: 0.1680
Train Acc: 0.9291 | F1: 0.9231 | Precision: 0.9360 | Recall: 0.8904 | AUC: 0.9976
Val   Acc: 0.8993 | F1: 0.8873 | Precision: 0.8971 | Recall: 0.6131 | AUC: 0.9895
✅ Saved new best model.


Epoch 5/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.40it/s]



📊 Epoch 5/10
Train Loss: 0.1302
Train Acc: 0.9536 | F1: 0.9540 | Precision: 0.9558 | Recall: 0.9495 | AUC: 0.9984
Val   Acc: 0.8985 | F1: 0.8990 | Precision: 0.9024 | Recall: 0.6723 | AUC: 0.9863


Epoch 6/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.39it/s]



📊 Epoch 6/10
Train Loss: 0.1032
Train Acc: 0.9709 | F1: 0.9707 | Precision: 0.9718 | Recall: 0.9559 | AUC: 0.9994
Val   Acc: 0.9107 | F1: 0.9094 | Precision: 0.9114 | Recall: 0.6280 | AUC: 0.9848
✅ Saved new best model.


Epoch 7/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.29it/s]



📊 Epoch 7/10
Train Loss: 0.0833
Train Acc: 0.9742 | F1: 0.9744 | Precision: 0.9756 | Recall: 0.9638 | AUC: 0.9995
Val   Acc: 0.9154 | F1: 0.9143 | Precision: 0.9136 | Recall: 0.6474 | AUC: 0.9876
✅ Saved new best model.


Epoch 8/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.22it/s]



📊 Epoch 8/10
Train Loss: 0.0686
Train Acc: 0.9692 | F1: 0.9690 | Precision: 0.9700 | Recall: 0.9669 | AUC: 0.9994
Val   Acc: 0.8942 | F1: 0.8913 | Precision: 0.8937 | Recall: 0.6342 | AUC: 0.9773


Epoch 9/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.33it/s]



📊 Epoch 9/10
Train Loss: 0.0543
Train Acc: 0.9771 | F1: 0.9771 | Precision: 0.9797 | Recall: 0.9619 | AUC: 0.9998
Val   Acc: 0.9147 | F1: 0.9136 | Precision: 0.9137 | Recall: 0.6382 | AUC: 0.9797


Epoch 10/10 - Training: 100%|██████████| 522/522 [00:13<00:00, 39.32it/s]



📊 Epoch 10/10
Train Loss: 0.0509
Train Acc: 0.9745 | F1: 0.9743 | Precision: 0.9749 | Recall: 0.9678 | AUC: 0.9997
Val   Acc: 0.9017 | F1: 0.8988 | Precision: 0.9008 | Recall: 0.6441 | AUC: 0.9884


In [9]:
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

def evaluate_model(model, loader, device):
    model.eval()
    y_true, y_pred, y_probs = [], [], []

    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            probs = torch.softmax(outputs, dim=1)
            preds = torch.argmax(probs, dim=1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_probs.extend(probs.cpu().numpy())

    # Classification metrics
    accuracy = accuracy_score(y_true, y_pred)
    weighted_recall = recall_score(y_true, y_pred, average='weighted')
    macro_recall = recall_score(y_true, y_pred, average='macro')
    weighted_precision = precision_score(y_true, y_pred, average='weighted')
    macro_precision = precision_score(y_true, y_pred, average='macro')

    try:
        auc = roc_auc_score(y_true, y_probs, multi_class='ovr')
    except:
        auc = 0.0

    print(f"\n📊 Evaluation on Test Set:")
    print(f"Accuracy:           {accuracy:.4f}")
    print(f"Weighted Recall:    {weighted_recall:.4f}")
    print(f"Macro Recall:       {macro_recall:.4f}")
    print(f"Weighted Precision: {weighted_precision:.4f}")
    print(f"Macro Precision:    {macro_precision:.4f}")
    print(f"AUC Score:          {auc:.4f}")


In [10]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CustomResNet18(num_classes=13).to(device)
model.load_state_dict(torch.load("best_custom_resnet18.pth"))
model.eval()

evaluate_model(model, test_loader, device)



📊 Evaluation on Test Set:
Accuracy:           0.7229
Weighted Recall:    0.7229
Macro Recall:       0.5723
Weighted Precision: 0.7379
Macro Precision:    0.6112
AUC Score:          0.9481
