# SNSClassification
Standard and non-standard sagittal view classification of a fetus for Down Syndrome detection.
- Output standard label for a high probability of a baby to have downsyndrome. 
- Output non-standard label for a low probability of a baby to have downsyndrome.

## Imports
Please uncomment the cell below and ensure the necessary packages are downloaded. 

In [None]:
# !pip install openpyxl pycocotools faster-coco-eval torchmetrics[detection] pandas matplotlib tqdm opencv-python cuda-python torchvision 
# !pip install scikit-learn
# !pip install openpyxl
# !pip install pandas
# !pip install tf_explain


In [None]:
# !conda install pytorch torchvision cudatoolkit=10.1 -c pytorch


In [None]:

import torch
from IPython.display import clear_output
import torchvision
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from PIL import Image
import os
from sklearn.model_selection import train_test_split



In [None]:
torch.cuda.is_available()

## Preparing the Dataset


In [None]:

# Function to load images and their labels
def load_images_and_labels(standard_path, nonstandard_path):
    image_paths = []
    labels = []  # 1 for Standard, 0 for Non-standard

    # Load Non-standard images
    for img_name in os.listdir(nonstandard_path):
        img_path = os.path.join(nonstandard_path, img_name)
        if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_paths.append(img_path)
            labels.append(0)  # Non-standard label

    # Load Standard images
    for img_name in os.listdir(standard_path):
        img_path = os.path.join(standard_path, img_name)
        if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_paths.append(img_path)
            labels.append(1)  # Standard label

    return image_paths, labels

# Define the Dataset class with conditional transformations
class FetalUltrasoundDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None, transform_nonstandard=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.transform_nonstandard = transform_nonstandard

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.labels[idx]

        if label == 0 and self.transform_nonstandard:
            image = self.transform_nonstandard(image)
        elif self.transform:
            image = self.transform(image)

        return image, label


## Load image paths and labels
Please ensure that you download the zip file folders from the [google drive](https://drive.google.com/file/d/1-ppPA9UHw9ZTBxyGmbWEyCgRNKTECC_6/view?usp=drive_link) and add it to the root folder of this directory before running the below code cell.

In [None]:

standard_path = "./SNSClassification_Dataset/allStandard"
nonstandard_path = "./SNSClassification/allNonStandard"
image_paths, labels = load_images_and_labels(standard_path, nonstandard_path)
print(f"Total images loaded: {len(image_paths)}")


## Seperate images (perform data augmentations for train + validation)

In [None]:
def separate_images(image_paths, labels):
    standard_images = [img for img, label in zip(image_paths, labels) if label == 1]
    nonstandard_images = [img for img, label in zip(image_paths, labels) if label == 0]
    return standard_images, nonstandard_images

standard_images, nonstandard_images = separate_images(image_paths, labels)
print(f"Number of standard images: {len(standard_images)}")
print(f"Number of non-standard images: {len(nonstandard_images)}")

# Split for test set (15%)
standard_train, standard_test = train_test_split(standard_images, test_size=0.15, random_state=42)
nonstandard_train, nonstandard_test = train_test_split(nonstandard_images, test_size=0.15, random_state=42)

# Combine for full test set
test_images = standard_test + nonstandard_test   #this should have no augmentations done to it 
test_labels = [1] * len(standard_test) + [0] * len(nonstandard_test)


In [None]:
## AUGMENTATIONS on NON STANDARD images only to make sure it is balanced with standard images

# Augment non-standard images to balance sizes
augmented_nonstandard = []
augment_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=25),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

for path in nonstandard_train:
    image = Image.open(path).convert('RGB')
    for _ in range((len(standard_train) // len(nonstandard_train)) + 1):  # Augment to balance
        augmented_image = augment_transforms(image)
        augmented_nonstandard.append((augmented_image, 0))  # Store as a tuple of (image, label)


In [None]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split



# Combine and split remaining train and validation sets 
all_train_images = standard_train + [x[0] for x in augmented_nonstandard]
all_train_labels = [1] * len(standard_train) + [0] * len(augmented_nonstandard)

print(len(all_train_images), len(all_train_labels))


train_images, val_images, train_labels, val_labels = train_test_split(
    all_train_images, all_train_labels, test_size=0.2, random_state=42)

print(len(train_images), len(val_images))
# # Add other necessary imports and class definitions (like FetalUltrasoundDataset)

# # Split data
image_paths_train, image_paths_temp, labels_train, labels_temp = train_test_split(
     image_paths, labels, test_size=0.3, random_state=42)

image_paths_val, image_paths_test, labels_val, labels_test = train_test_split(
     image_paths_temp, labels_temp, test_size=0.5, random_state=42)


# Transformations for the training data with augmentation
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Transformations for the validation and test data without augmentation
eval_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create datasets
train_dataset = FetalUltrasoundDataset(image_paths_train, labels_train, transform=train_transforms)
val_dataset = FetalUltrasoundDataset(image_paths_val, labels_val, transform=eval_transforms)
test_dataset = FetalUltrasoundDataset(image_paths_test, labels_test, transform=eval_transforms)


# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Now you can use these loaders in your model training and evaluation routines.


In [None]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA not available. Using CPU.")

## Train Function

In [None]:
import matplotlib.pyplot as plt

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    model.train()  # Set model to training mode
    accuracies = []  # Store accuracies for each epoch
    loss_a = []  # Store accuracies for each epoch

    for epoch in range(num_epochs):
        running_loss = 0.0
        total = correct = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_accuracy = 100 * correct / total
        accuracies.append(epoch_accuracy)
        loss_a.append(epoch_loss)

    return accuracies, loss_a




## Evaluate Function

loss function and optimizer

In [None]:
%pip install seaborn


In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def evaluate_model(model, data_loader):
    model.eval()  # Set the model to evaluation mode
    total = correct = 0
    all_predictions = []
    all_labels = []
    accuracies = []  # Store accuracies for each epoch

    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Collect all labels and predictions for confusion matrix
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

            accuracy = 100 * correct / total
            print(f'Accuracy: {accuracy:.2f}%')
            accuracies.append(accuracy)

    # Compute confusion matrix
    cm = confusion_matrix(all_labels, all_predictions)
    print("Confusion Matrix:")
    print(cm)

    # Optionally, plot the confusion matrix using seaborn
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Standard', 'Standard'], yticklabels=['Non-Standard', 'Standard'])
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

    return accuracies


# Best Iteration (ie. Iteration 12) 
For further testing beyond this iteration, please run Iterations 1 to 11 below.

## Iteration 12 (Resnet 34, Batchsize 16, LR 0.001, epochs 7) 


In [None]:

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:

# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)


# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

loss, accuracy = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=7)


In [None]:
print(loss, accuracy)

In [None]:
## For test 
accur = evaluate_model(model, val_loader)
accur_v2 = evaluate_model(model, test_loader)

In [None]:
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, 7 + 1), accur, label='Acc Val')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss vs. Epoch')
plt.legend()

# Plotting the accuracy
plt.subplot(1, 2, 2)
plt.plot(range(1, 7 + 1), accur_v2, label='Acc test', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training Accuracy vs. Epoch')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
def plot_accuracies(accuracies):
    plt.figure(figsize=(10, 5))
    plt.scatter(range(1, len(accuracies) + 1), accuracies)
    plt.title('Model Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.xticks(range(1, len(accuracies) + 1))
    plt.show()

# Assume model, train_loader, val_loader, criterion, optimizer are all set up

# Train the model and collect accuracies
accuracies = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=50)

# Plot the accuracies
plot_accuracies(accuracies)


## Saving your model
Duplicate and run the below cell whenever you want to use the model. 
- change params according to iteration you like and rename model accordingly. 

In [None]:
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)  # Adjust according to your classes
torch.save(model.state_dict(), 'BEST.pth')
# Load model weights (assuming you've saved your trained model)
model.load_state_dict(torch.load('BEST.pth'))
model.eval()  # Set the model to evaluation mode

# Other iterations below

## Iteration 1: Resnet18 + Batchsize 32

In [None]:

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)


# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



In [None]:
train_model(model, train_loader, criterion, optimizer)


In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 2: Resnet 34 + Batchsize 32

In [None]:

# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, criterion, optimizer)


In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 3: RESNET 34 + Batchsize 64

In [None]:

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [None]:

# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, criterion, optimizer)


In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 4: RESNET 18 + Batchsize 64


In [None]:

# Create datasets
train_dataset = FetalUltrasoundDataset(train_images, train_labels, transform=train_transforms)
val_dataset = FetalUltrasoundDataset( val_images, val_labels, transform=train_transforms)
test_dataset = FetalUltrasoundDataset(test_images, test_labels, transform=eval_transforms)

# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, criterion, optimizer)


In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 5: RESNET 34 + Batchsize 128


In [None]:

# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_model(model, train_loader, criterion, optimizer)


# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 6: RESNET 34 + Batchsize 64 + Decay 0.01


In [None]:

# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

train_model(model, train_loader, criterion, optimizer)



In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 7: RESNET 34 + Batchsize 64 + Decay 0.01 + epochs=50


In [None]:

# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

train_model(model, train_loader, criterion, optimizer, num_epochs=50)

In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 8: Resnet 34 + Batchsize64 + Decay 0.001 + epochs=15

In [None]:
# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet34(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

train_model(model, train_loader, criterion, optimizer, num_epochs=15)

In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 9: Resnet50 + Batchsize64  + decay 0.01 + epochs=50

In [None]:
# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.01)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

train_model(model, train_loader, criterion, optimizer, num_epochs=15)

In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 10: Resnet50 + Batchsize128  + decay 0.01 + epochs=15

In [None]:
# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.01)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

train_model(model, train_loader, criterion, optimizer, num_epochs=15)

In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)

## Iteration 11: Resnet50 + Batchsize 32 + decay 0.01 + epochs=50

In [None]:
# Load a pretrained ResNet model and modify it for binary classification
model = models.resnet50(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.01)

# Creating data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

train_model(model, train_loader, criterion, optimizer, num_epochs=15)

In [None]:
## For test 
evaluate_model(model, val_loader)
evaluate_model(model, test_loader)