# COVID-19 Chest X-Ray Database - Experiment

## CNN Model Implementation

In [None]:
import torch
import wandb
from torchvision import models

In [None]:
NUMBER_OF_CLASSES = 4
IMAGE_SIZE = 299

In [None]:
#TODO: Check project name and other values
# Initialize wandb run | 'wandb login' terminal
wandb_run = wandb.init(
    # set the wandb project where this run will be logged
    project="my-awesome-project",

    # track hyperparameters and run metadata
    config={
        "learning_rate": 0.001,
        "pretrained_model": "RestNet-50",
        "architecture": "CNN",
        "optimizer": "Adam",
        "criterion": "Cross entropy loss",
        "dataset": "COVID-19 Chest X-Ray Database",
        "epochs": 20
    },
)

In [None]:
# Define device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load pre-trained ResNet50, freeze early layers
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

# Get number of input features from the original FC layer
num_features = model.fc.in_features

# Define new classifier head
classifier = torch.nn.Sequential(
    torch.nn.Linear(num_features, 128),  # Example hidden layer
    torch.nn.ReLU(inplace=True),
    torch.nn.Linear(128, NUMBER_OF_CLASSES)  # Output layer with your class count
)

# Combine model and classifier
full_model = torch.nn.Sequential(model, classifier)
full_model.to(device)

# Define loss function
criterion = torch.nn.CrossEntropyLoss()

# Define optimizer (replace with your learning rate if needed)
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

# Move model to chosen device
full_model.to(device)

In [None]:
class EarlyStopping(object):
    def __init__(self, patience=5):
        self.patience = patience
        self.best_val_loss = float('inf')
        self.counter = 0

    def __call__(self, epoch, logs):
        val_loss = logs.get('val_loss')
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                print(f"Early stopping triggered after {self.patience} epochs with no improvement.")
                return False
        return True

In [None]:
# ... (your data preprocessing and dataset creation)

# Create DataLoaders (replace with your data loaders)
train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_dataset, batch_size=32)

In [None]:
# TODO: Create data loaders
train_loader = None
val_loader = None

In [None]:

early_stopping = EarlyStopping(patience=5)

# Training loop
for epoch in range(20):
    # Training phase
    for data, target in train_loader:
        # Move data and target to device
        data, target = data.to(device), target.to(device)

        # Forward pass, calculate loss
        output = full_model(data)
        loss = criterion(output, target)

        # Backpropagation, update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Validation phase
    val_loss = 0.0
    with torch.no_grad():
        for data, target in val_loader:
            # Move data and target to device
            data, target = data.to(device), target.to(device)

            # Forward pass
            output = full_model(data)

            # Calculate validation loss
            val_loss += criterion(output, target).item()
            # TODO: Calculate metrics

        # Calculate average validation loss
        val_loss /= len(val_loader)

    # Log validation, and metrics
    if wandb_run is not None:
        # TODO: Add metrics
        wandb_run.log({"val_loss": val_loss})
        
    if not early_stopping(epoch, logs={'val_loss': val_loss}): break

# Finish Wandb run
if wandb_run is not None:
    wandb.finish()

### Raw Images

### Bilateral Filtered Images

In [None]:
def run_model(name, classifier, params, cmap):
    print(f"\n- {name}")
    
    model = classifier(**params)    
    y_pred, y_pred_proba = fit_and_predict(model, X_train, X_test, y_train, y_test)
    metrics, cm = get_metrics(y_test, y_pred, y_pred_proba)
    
    plot_confusion_matrix(cm, cmap)
    
    metrics_to_print = metrics.copy()
    metrics_to_print.pop('TPR')
    metrics_to_print.pop('FPR')
    
    for metric, value in metrics_to_print.items():
            print(f"    - {metric}: {value}")
        
    return metrics

## Multilayer Perceptron

In [None]:
# Preprocess dataset
wineDF = pd.read_csv("./data/winequality-red.csv")

wineDF['label'] = wineDF['quality'].apply(lambda x: 1 if x > 6 else 0)
wineDF.drop('quality', axis=1)

wine_y = wineDF['label'].values
wine_X = wineDF.drop('label', axis=1).values

In [None]:
# Standardize dataset
scaler = StandardScaler()
wine_X = scaler.fit_transform(wine_X)

In [None]:
# Obtained from grid search
models['Logistic Regression']['params'] = {'C': 0.046415888336127774, 'max_iter': 100, 'penalty': 'l1', 'solver': 'liblinear'}
models['Decision Tree']['params'] = {'max_depth': None, 'criterion': 'entropy'}
models['K-Nearest Neighbors']['params'] = {'n_neighbors': 3}
models['Neural Network']['params'] = {'activation': 'tanh', 'hidden_layer_sizes': (50,), 'learning_rate_init': 0.001, 'max_iter': 200, 'solver': 'adam'}

In [None]:
# Cross validate
all_metrics = {}

for i in range(5):
    print(f"\nSplit {i+1}:")
    
    (X_train, X_test, y_train, y_test) = train_test_split(wine_X, wine_y, test_size=0.2, stratify=wine_y)
    
    for key, model in models.items():
        metrics = run_model(key, model['classifier'], model['params'], plt.cm.Blues)
        all_metrics[key] = metrics
        
plt.figure()

for name, metrics in all_metrics.items():
        plt.plot(metrics['FPR'],metrics['TPR'], linestyle='-', color=models[name]['color'], label=name)

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for Multiple Models')
plt.legend()
plt.show()

## Heart Disease Dataset

In [None]:
# Preprocess dataset
heartDF = pd.read_csv("./data/heart_statlog_cleveland_hungary_final.csv")

heart_y = heartDF['target'].values
heart_X = heartDF.drop('target', axis=1).values

In [None]:
# Standardize dataset
scaler = StandardScaler()
heart_X = scaler.fit_transform(heart_X)

In [None]:
# Obtained from grid search
models['Logistic Regression']['params'] = {'C': 0.046415888336127774, 'max_iter': 100, 'penalty': 'l1', 'solver': 'liblinear'}
models['Decision Tree']['params'] = {'max_depth': 20, 'criterion': 'entropy'}
models['K-Nearest Neighbors']['params'] = {'n_neighbors': 10}
models['Neural Network']['params'] = {'activation': 'relu', 'hidden_layer_sizes': (100,), 'max_iter': 300, 'solver': 'lbfgs'}

In [None]:
# Cross validate
all_metrics = {}

for i in range(5):
    print(f"\nSplit {i+1}:")
    
    (X_train, X_test, y_train, y_test) = train_test_split(heart_X, heart_y, test_size=0.2, stratify=heart_y)
    
    for key, model in models.items():
        metrics = run_model(key, model['classifier'], model['params'], plt.cm.Oranges)
        all_metrics[key] = metrics
        
plt.figure()

for name, metrics in all_metrics.items():
        plt.plot(metrics['FPR'],metrics['TPR'], linestyle='-', color=models[name]['color'], label=name)

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves for Multiple Models')
plt.legend()
plt.show()