In [2]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# Paths
data_dir = "/Users/Ali2/Documents/LC25000_lung"

# Hyperparameters
BATCH_SIZE = 32
EPOCHS = 10
LEARNING_RATE = 0.001

# Transformations for the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match ResNet input size
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize like ImageNet
])

# Load dataset
dataset = datasets.ImageFolder(data_dir, transform=transform)

# Create data loader
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Define the model
model = models.resnet18(pretrained=True)  # Use ResNet18 pretrained on ImageNet
model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))  # Adjust output layer for 3 classes

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training loop
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{EPOCHS}, Loss: {running_loss / len(train_loader):.4f}")

print("Training complete.")




Epoch 1/10, Loss: 0.1520
Epoch 2/10, Loss: 0.0729
Epoch 3/10, Loss: 0.0463
Epoch 4/10, Loss: 0.0402
Epoch 5/10, Loss: 0.0319
Epoch 6/10, Loss: 0.0156
Epoch 7/10, Loss: 0.0254
Epoch 8/10, Loss: 0.0254
Epoch 9/10, Loss: 0.0178
Epoch 10/10, Loss: 0.0129
Training complete.


In [4]:

torch.save(model.state_dict(), "model1.pth")

Model 2

- Uses 5-folds cross validation
- Uses 5 epochs (instead of 10 like before) to save time and prevent over fitting
- Uses early stopping to terminate training automatically when perofmance stops improving
- Use a confusion matrix to understand class-wise performance (e.g., is the model struggling with one class?)
- Augment the training dataset with techniques like random rotations, flips, or color jittering: revents overfitting when training on limited data
- More messages to indicate how long the program will take to run

In [5]:
import os
import time
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Paths
data_dir = "/Users/Ali2/Documents/LC25000_lung"

# Hyperparameters
BATCH_SIZE = 32
EPOCHS = 5
LEARNING_RATE = 0.001
K_FOLDS = 5
PATIENCE = 3  # Early stopping patience

# Transformations for the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Load dataset
dataset = datasets.ImageFolder(data_dir, transform=transform)
dataset_size = len(dataset)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# K-Fold Cross Validation
kf = KFold(n_splits=K_FOLDS, shuffle=True, random_state=42)

# Track the best fold
best_fold_idx = -1
best_fold_loss = float('inf')
best_model_path = "best_model.pth"

print(f"Starting training with {K_FOLDS} folds, {EPOCHS} epochs per fold.")

# Start timing
start_time = time.time()

for fold, (train_idx, val_idx) in enumerate(kf.split(range(dataset_size))):
    print(f"\nFold {fold + 1}/{K_FOLDS}")
    print(f"Training on {len(train_idx)} samples, validating on {len(val_idx)} samples.")
    
    # Split data into train and validation sets
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)

    # Data loaders
    train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

    # Define model
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))
    model = model.to(device)

    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # Early stopping variables
    best_val_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(EPOCHS):
        print(f"  Epoch {epoch + 1}/{EPOCHS}...")
        model.train()
        running_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        train_loss = running_loss / len(train_loader)
        print(f"    Training Loss: {train_loss:.4f}")

        # Validation phase
        model.eval()
        val_loss = 0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        val_loss /= len(val_loader)
        print(f"    Validation Loss: {val_loss:.4f}")

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            # Save best model weights for this fold
            torch.save(model.state_dict(), f'temp_best_model_fold_{fold}.pth')
        else:
            epochs_no_improve += 1
            print(f"    Early stopping patience count: {epochs_no_improve}/{PATIENCE}")

        if epochs_no_improve >= PATIENCE:
            print(f"  Early stopping triggered at epoch {epoch + 1}.")
            break

    # Update the best fold if this one is better
    if best_val_loss < best_fold_loss:
        best_fold_loss = best_val_loss
        best_fold_idx = fold
        # Keep the best model path
        best_model_path = f'temp_best_model_fold_{fold}.pth'

print(f"\nBest fold is Fold {best_fold_idx + 1} with Validation Loss: {best_fold_loss:.4f}")
print(f"Best model saved at {best_model_path}")

# Load and evaluate the best fold
print("\nEvaluating the best fold...")
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))
model.load_state_dict(torch.load(best_model_path))
model = model.to(device)
model.eval()

# Reuse validation set for the best fold
val_idx = list(kf.split(range(dataset_size)))[best_fold_idx][1]
val_subset = Subset(dataset, val_idx)
val_loader = DataLoader(val_subset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

conf_matrix = confusion_matrix(all_labels, all_preds)
print(f"\nConfusion Matrix for Best Fold:\n{conf_matrix}")
print(classification_report(all_labels, all_preds, target_names=dataset.classes))

# Final results
end_time = time.time()
print(f"Total training time: {(end_time - start_time) / 60:.2f} minutes.")

Starting training with 5 folds, 5 epochs per fold.

Fold 1/5
Training on 12000 samples, validating on 3000 samples.




  Epoch 1/5...
    Training Loss: 0.2133
    Validation Loss: 0.1768
  Epoch 2/5...
    Training Loss: 0.1270
    Validation Loss: 0.1307
  Epoch 3/5...
    Training Loss: 0.0999
    Validation Loss: 0.1134
  Epoch 4/5...
    Training Loss: 0.0829
    Validation Loss: 0.0719
  Epoch 5/5...
    Training Loss: 0.0774
    Validation Loss: 0.1499
    Early stopping patience count: 1/3

Fold 2/5
Training on 12000 samples, validating on 3000 samples.
  Epoch 1/5...
    Training Loss: 0.2272
    Validation Loss: 0.1062
  Epoch 2/5...
    Training Loss: 0.1329
    Validation Loss: 0.0751
  Epoch 3/5...
    Training Loss: 0.0992
    Validation Loss: 0.0923
    Early stopping patience count: 1/3
  Epoch 4/5...
    Training Loss: 0.0900
    Validation Loss: 0.0673
  Epoch 5/5...
    Training Loss: 0.0757
    Validation Loss: 0.1823
    Early stopping patience count: 1/3

Fold 3/5
Training on 12000 samples, validating on 3000 samples.
  Epoch 1/5...
    Training Loss: 0.2132
    Validation Loss: 0

  model.load_state_dict(torch.load(best_model_path))



Confusion Matrix for Best Fold:
[[ 980    4   36]
 [   1 1025    0]
 [   4    0  950]]
                          precision    recall  f1-score   support

         adenocarcinomas       0.99      0.96      0.98      1020
           benign_tissue       1.00      1.00      1.00      1026
squamous_cell_carcinomas       0.96      1.00      0.98       954

                accuracy                           0.98      3000
               macro avg       0.98      0.99      0.98      3000
            weighted avg       0.99      0.98      0.98      3000

Total training time: 485.03 minutes.
