# CheXScan

## Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import torchvision.models as models
from torch.utils.data import DataLoader

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image

## Constants

In [None]:
BATCH_SIZE = 32
IMAGE_SIZE = (299, 299)
CLASSES = ['normal', 'pneumonia', 'tuberculosis']
NUM_CLASSES = len(CLASSES)
EPOCHS = 2

## Preprocessing

In [None]:
preprocess_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
data_augmentation_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.8, 1.0)),
])

## Loading Datasets

In [None]:
train_dataset = datasets.ImageFolder(root='D:/Jupyter/chexscan-folder/chexscan-experiment/data/train_data', transform=preprocess_transform)
valid_dataset = datasets.ImageFolder(root='D:/Jupyter/chexscan-folder/chexscan-experiment/data/test_data', transform=preprocess_transform)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Training Parameters

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = EPOCHS
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()

## Functions

### Training Function

In [None]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=10, device='cpu'):
    history = {'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': []}
    all_true_labels = []
    all_predicted_labels = []
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += loss.item()
            
            # Store true and predicted labels for training data
            all_true_labels.extend(labels.cpu().numpy())
            all_predicted_labels.extend(predicted.cpu().numpy())
        
        # Compute training accuracy and loss
        train_accuracy = correct / total
        train_loss = running_loss / len(train_loader)
        history['accuracy'].append(train_accuracy)
        history['loss'].append(train_loss)
        
        # Validation
        model.eval()
        correct = 0
        total = 0
        val_running_loss = 0.0
        val_true_labels = []
        val_predicted_labels = []
        
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                val_running_loss += loss.item()
                
                # Store true and predicted labels for validation data
                val_true_labels.extend(labels.cpu().numpy())
                val_predicted_labels.extend(predicted.cpu().numpy())
        
        # Compute validation accuracy and loss
        val_accuracy = correct / total
        val_loss = val_running_loss / len(valid_loader)
        history['val_accuracy'].append(val_accuracy)
        history['val_loss'].append(val_loss)
        
        # Print training progress
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Accuracy: {train_accuracy*100:.2f}%, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy*100:.2f}%")
    
    return history, all_true_labels, all_predicted_labels


### Evaluation Function

In [None]:
def evaluate_model(model, valid_loader, device='cpu'):
    model.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return all_labels, all_predictions

### Visualization Function

In [None]:
def plot_history_metrics(history):
    history_dict, _, _ = history  # Unpack the tuple
    
    # Create a larger figure
    plt.figure(figsize=(12, 6))

    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history_dict['accuracy'], label='Training Accuracy', color='blue')
    plt.plot(history_dict['val_accuracy'], label='Validation Accuracy', color='orange')
    plt.title('Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history_dict['loss'], label='Training Loss', color='blue')
    plt.plot(history_dict['val_loss'], label='Validation Loss', color='orange')
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
def plot_confusion_matrix(true_labels, predicted_labels, class_names):
    # Compute confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)
    
    # Plot confusion matrix as a heatmap
    plt.figure(figsize=(8, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.show()

## AlexNet Model

In [None]:
# Define AlexNet model
alexnet_model = models.alexnet(pretrained=True)
num_features = alexnet_model.classifier[6].in_features
alexnet_model.classifier[6] = nn.Linear(num_features, NUM_CLASSES)
alexnet_model = alexnet_model.to(device)

# Train AlexNet model
optimizer = optim.Adam(alexnet_model.parameters(), lr=learning_rate)
history_alexnet = train_model(alexnet_model, train_loader, valid_loader, criterion, optimizer, num_epochs=EPOCHS, device=device)

# Plot history for AlexNet
plot_history_metrics(history_alexnet)

# Evaluate AlexNet model
true_labels_alexnet, predictions_alexnet = evaluate_model(alexnet_model, valid_loader, device=device)

# Print classification report for AlexNet
print("AlexNet Classification Report:")
print(classification_report(true_labels_alexnet, predictions_alexnet, target_names=CLASSES))

print("AlexNet Confusion Matrix:")
plot_confusion_matrix(true_labels_alexnet, predictions_alexnet, CLASSES)

## DenseNet-121 Model

In [None]:
# Define DenseNet-121 model
densenet_model = models.densenet121(pretrained=True)
num_features = densenet_model.classifier.in_features
densenet_model.classifier = nn.Linear(num_features, NUM_CLASSES)
densenet_model = densenet_model.to(device)

# Train DenseNet-121 model
optimizer = optim.Adam(densenet_model.parameters(), lr=learning_rate)
history_densenet = train_model(densenet_model, train_loader, valid_loader, criterion, optimizer, num_epochs=EPOCHS, device=device)

# Plot history for DenseNet-121
plot_history_metrics(history_densenet)

# Evaluate DenseNet-121 model
true_labels_densenet, predictions_densenet = evaluate_model(densenet_model, valid_loader, device=device)

# Print classification report for DenseNet-121
print("DenseNet-121 Classification Report:")
print(classification_report(true_labels_densenet, predictions_densenet, target_names=CLASSES))

# For DenseNet-121 model
print("DenseNet-121 Confusion Matrix:")
plot_confusion_matrix(true_labels_densenet, predictions_densenet, CLASSES)

## InceptionV3 Model

In [None]:
# # Define InceptionV3 model
# inception_model = models.inception_v3(pretrained=True)
# num_features = inception_model.fc.in_features
# inception_model.fc = nn.Linear(num_features, NUM_CLASSES)
# inception_model = inception_model.to(device)

# # Train InceptionV3 model
# optimizer = optim.Adam(inception_model.parameters(), lr=learning_rate)
# history_inception = train_model(inception_model, train_loader, valid_loader, criterion, optimizer, num_epochs=EPOCHS, device=device)

# # Plot history for InceptionV3
# plot_history_metrics(history_inception)

# # Evaluate InceptionV3 model
# true_labels_inception, predictions_inception = evaluate_model(inception_model, valid_loader, device=device)

# # Print classification report for InceptionV3
# print("InceptionV3 Classification Report:")
# print(classification_report(true_labels_inception, predictions_inception, target_names=CLASSES))

print("InceptionV3 Confusion Matrix:")
plot_confusion_matrix(true_labels_inception, predictions_inception, CLASSES)

## VGG-16 Model

In [None]:
# Define VGG16 model
vgg16_model = models.vgg16(pretrained=True)
num_features = vgg16_model.classifier[6].in_features
vgg16_model.classifier[6] = nn.Linear(num_features, NUM_CLASSES)
vgg16_model = vgg16_model.to(device)

# Train VGG16 model
optimizer = optim.Adam(vgg16_model.parameters(), lr=learning_rate)
history_vgg16 = train_model(vgg16_model, train_loader, valid_loader, criterion, optimizer, num_epochs=EPOCHS, device=device)

# Plot history for VGG16
plot_history_metrics(history_vgg16)

# Evaluate VGG16 model
true_labels_vgg16, predictions_vgg16 = evaluate_model(vgg16_model, valid_loader, device=device)

# Print classification report for VGG16
print("VGG16 Classification Report:")
print(classification_report(true_labels_vgg16, predictions_vgg16, target_names=CLASSES))

print("VGG16 Confusion Matrix:")
plot_confusion_matrix(true_labels_vgg16, predictions_vgg16, CLASSES)

## ResNet-18 Model

In [None]:
# Define ResNet-18 model
resnet18_model = models.resnet18(pretrained=True)
num_features = resnet18_model.fc.in_features
resnet18_model.fc = nn.Linear(num_features, NUM_CLASSES)
resnet18_model = resnet18_model.to(device)

# Train ResNet-18 model
optimizer = optim.Adam(resnet18_model.parameters(), lr=learning_rate)
history_resnet18 = train_model(resnet18_model, train_loader, valid_loader, criterion, optimizer, num_epochs=EPOCHS, device=device)

# Plot history for ResNet-18
plot_history_metrics(history_resnet18)

# Evaluate ResNet-18 model
true_labels_resnet18, predictions_resnet18 = evaluate_model(resnet18_model, valid_loader, device=device)

# Print classification report for ResNet-18
print("ResNet-18 Classification Report:")
print(classification_report(true_labels_resnet18, predictions_resnet18, target_names=CLASSES))

print("ResNet-18 Confusion Matrix:")
plot_confusion_matrix(true_labels_resnet18, predictions_resnet18, CLASSES)

## Ensemble

In [None]:
import torch
import numpy as np

def make_ensemble_predictions(models, images):
    ensemble_predictions = []
    for _, model in models:
       
    # Model accepts images as input and returns class probabilities
        probabilities = model(images)
        ensemble_predictions.append(probabilities.detach().numpy())  # Use detach() to avoid gradient computation
    
    # Ensemble by averaging the probabilities
    average_probabilities = np.mean(ensemble_predictions, axis=0)
    return average_probabilities

# Define a dictionary to store the accuracy of each model
model_accuracies = {
    'AlexNet': accuracy_score(true_labels_alexnet, predictions_alexnet),
    'DenseNet-121': accuracy_score(true_labels_densenet, predictions_densenet),
    'ResNet-18': accuracy_score(true_labels_resnet18, predictions_resnet18),  # Corrected to ResNet-18
    'VGG16': accuracy_score(true_labels_vgg16, predictions_vgg16),
#     'InceptionV3': accuracy_score(true_labels_inception, predictions_inception)
}

# Sort models by accuracy (descending order) and select top 3
top_models = sorted(model_accuracies.items(), key=lambda x: x[1], reverse=True)[:3]

In [None]:
# Define a dictionary to map model names to their corresponding model objects
model_objects = {
    'AlexNet': alexnet_model,
    'DenseNet-121': densenet_model,
    'ResNet-18': resnet18_model,
    'VGG16': vgg16_model
}

# Get the top 3 performing models' objects
top_models_objects = [(model_name, model_objects[model_name]) for model_name, _ in top_models]

# Now, you can pass top_models_objects to the make_ensemble_predictions function
ensemble_predictions = make_ensemble_predictions(top_models_objects, image_tensor)

In [None]:
# Load the image
image_path = 'D:/Jupyter/chexscan-folder/chexscan/test/pneumonia.jpeg'
image = Image.open(image_path)

# Define transformations to preprocess the image
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Preprocess the image
image_tensor = preprocess(image)

# Add a batch dimension to the image tensor
image_tensor = image_tensor.unsqueeze(0)  # Add a batch dimension at index 0

# Make predictions using the ensemble of top models
ensemble_predictions = make_ensemble_predictions(top_models_objects, image_tensor)

# Now you have ensemble predictions, you can perform further processing as needed
# For example, you can get the predicted class label with the highest probability
predicted_class_index = np.argmax(ensemble_predictions)

In [None]:
# Get the predicted class label
predicted_class_label = CLASSES[predicted_class_index]

print("Predicted class label:", predicted_class_label)