In [1]:
# BLOCK 1

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
from tqdm import tqdm
import os
from datetime import datetime
import optuna


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# BLOCK 2

import numpy as np
import os

def load_data(directory):
    files = [f for f in os.listdir(directory) if f.endswith('.npz')]
    images_list = []
    labels_list = []

    for file in sorted(files):
        data = np.load(os.path.join(directory, file), allow_pickle=True)
        images = data['images']
        labels = data['labels']

        images_list.append(images)
        labels_list.extend(labels)

    images_combined = np.concatenate(images_list, axis=0)
    labels_combined = np.array(labels_list, dtype=np.int32)

    return images_combined, labels_combined

In [4]:
# BLOCK 3

import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self, conv1_filters=32, conv2_filters=64, conv3_filters=128, fc1_units=128, dropout_rate=0.5):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, conv1_filters, kernel_size=3, padding=1)  # Changed to 1 channel for grayscale
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(conv1_filters, conv2_filters, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(conv2_filters, conv3_filters, kernel_size=3, padding=1)

        self.fc1 = nn.Linear(conv3_filters * 32 * 32, fc1_units)  # Adjusted the size accordingly
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(fc1_units, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


In [6]:
# BLOCK 4

from datetime import datetime

def train_and_evaluate(X, y, fold_count=7, conv1_filters=32, conv2_filters=64, conv3_filters=128, fc1_units=128, dropout_rate=0.5):
    kf = KFold(n_splits=fold_count)
    results = []
    best_model = None
    best_f1 = 0.0

    for fold, (train_index, val_index) in enumerate(kf.split(X)):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        print(f"Fold {fold + 1}:")
        print(f"  X_train shape: {X_train.shape}")
        print(f"  y_train shape: {y_train.shape}")
        print(f"  X_val shape: {X_val.shape}")
        print(f"  y_val shape: {y_val.shape}")

        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)
        y_train_tensor = torch.tensor(y_train, dtype=torch.long)
        X_val_tensor = torch.tensor(X_val, dtype=torch.float32).unsqueeze(1)
        y_val_tensor = torch.tensor(y_val, dtype=torch.long)

        train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=16, shuffle=True)
        val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=16, shuffle=False)

        model = CNNModel(conv1_filters, conv2_filters, conv3_filters, fc1_units, dropout_rate)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        model.train()
        for epoch in tqdm(range(20), desc=f'Training fold {fold + 1}'):
            for inputs, labels in train_loader:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            val_predictions = []
            val_true = []
            for inputs, labels in val_loader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                val_predictions.extend(predicted.numpy())
                val_true.extend(labels.numpy())

        accuracy = accuracy_score(val_true, val_predictions)
        precision = precision_score(val_true, val_predictions, average='macro')
        recall = recall_score(val_true, val_predictions, average='macro')
        f1 = f1_score(val_true, val_predictions, average='macro')

        print(f"  Results for fold {fold + 1}:")
        print(f"    Accuracy: {accuracy}")
        print(f"    Precision: {precision}")
        print(f"    Recall: {recall}")
        print(f"    F1 Score: {f1}")
        results.append((accuracy, precision, recall, f1))

        if f1 > best_f1:
            best_f1 = f1
            best_model = model

    avg_results = np.mean(results, axis=0)
    print(f"Average results across all folds:")
    print(f"  Accuracy: {avg_results[0]}")
    print(f"  Precision: {avg_results[1]}")
    print(f"  Recall: {avg_results[2]}")
    print(f"  F1 Score: {avg_results[3]}")

    return best_model, results



In [7]:
# BLOCK 5

# Define global variables for the data
X_train_sample, y_train_sample = None, None

def objective(trial):
    conv1_filters = trial.suggest_int('conv1_filters', 16, 64, step=16)
    conv2_filters = trial.suggest_int('conv2_filters', 32, 128, step=32)
    conv3_filters = trial.suggest_int('conv3_filters', 64, 256, step=64)
    fc1_units = trial.suggest_int('fc1_units', 64, 256, step=64)
    dropout_rate = trial.suggest_float('dropout_rate', 0.3, 0.7, step=0.1)

    best_model, results = train_and_evaluate(X_train_sample, y_train_sample, fold_count=3, conv1_filters=conv1_filters, conv2_filters=conv2_filters, conv3_filters=conv3_filters, fc1_units=fc1_units, dropout_rate=dropout_rate)
    avg_f1 = np.mean([result[3] for result in results])
    return avg_f1

In [8]:
# BLOCK 6

# Load data directly from files
train_metrics, train_labels = load_data('./npz_results/train')
val_metrics, val_labels = load_data('./npz_results/val')
test_metrics, test_labels = load_data('./npz_results/test')

# Cap the data size to 10% for hyperparameter tuning
def sample_data(X, y, sample_fraction=0.1):
    sample_size = int(len(X) * sample_fraction)
    indices = np.random.choice(len(X), sample_size, replace=False)
    return X[indices], y[indices]

X_train_sample, y_train_sample = sample_data(train_metrics, train_labels, sample_fraction=0.1)
X_val_sample, y_val_sample = sample_data(val_metrics, val_labels, sample_fraction=0.1)
X_test_sample, y_test_sample = sample_data(test_metrics, test_labels, sample_fraction=0.1)

# Full data for final training
X_train_full, y_train_full = train_metrics, train_labels
X_val_full, y_val_full = val_metrics, val_labels
X_test_full, y_test_full = test_metrics, test_labels



In [9]:
# For pipeline testing
# train_and_evaluate(X_train_sample, y_train_sample, fold_count=3)

Fold 1:
  X_train shape: (1785, 256, 256)
  y_train shape: (1785,)
  X_val shape: (893, 256, 256)
  y_val shape: (893,)


Training fold 1: 100%|██████████| 20/20 [13:22<00:00, 40.13s/it]


  Results for fold 1:
    Accuracy: 0.6674132138857782
    Precision: 0.638601837131249
    Recall: 0.5848968308068513
    F1 Score: 0.5763601484219387
Fold 2:
  X_train shape: (1785, 256, 256)
  y_train shape: (1785,)
  X_val shape: (893, 256, 256)
  y_val shape: (893,)


Training fold 2: 100%|██████████| 20/20 [13:05<00:00, 39.26s/it]


  Results for fold 2:
    Accuracy: 0.6326987681970885
    Precision: 0.5910991021901985
    Recall: 0.587881746767289
    F1 Score: 0.5890612373170513
Fold 3:
  X_train shape: (1786, 256, 256)
  y_train shape: (1786,)
  X_val shape: (892, 256, 256)
  y_val shape: (892,)


Training fold 3: 100%|██████████| 20/20 [13:04<00:00, 39.24s/it]


  Results for fold 3:
    Accuracy: 0.6591928251121076
    Precision: 0.6208405136149744
    Recall: 0.6149194104631208
    F1 Score: 0.6170203150068923
Average results across all folds:
  Accuracy: 0.6531016023983248
  Precision: 0.6168471509788073
  Recall: 0.5958993293457536
  F1 Score: 0.5941472335819608


(CNNModel(
   (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   (fc1): Linear(in_features=131072, out_features=128, bias=True)
   (dropout): Dropout(p=0.5, inplace=False)
   (fc2): Linear(in_features=128, out_features=2, bias=True)
 ),
 [(0.6674132138857782,
   0.638601837131249,
   0.5848968308068513,
   0.5763601484219387),
  (0.6326987681970885,
   0.5910991021901985,
   0.587881746767289,
   0.5890612373170513),
  (0.6591928251121076,
   0.6208405136149744,
   0.6149194104631208,
   0.6170203150068923)])

In [None]:
# BLOCK 7

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

best_trial = study.best_trial
print(f"Best trial: {best_trial.values}")
print(f"Best hyperparameters: {best_trial.params}")

# BLOCK 8

best_hyperparameters = best_trial.params
best_model, results = train_and_evaluate(X_train_full, y_train_full, fold_count=7, **best_hyperparameters)

avg_results = np.mean(results, axis=0)
print("Final training with best hyperparameters:")
print(f"  Accuracy: {avg_results[0]}")
print(f"  Precision: {avg_results[1]}")
print(f"  Recall: {avg_results[2]}")
print(f"  F1 Score: {avg_results[3]}")

now = datetime.now()
timestamp = now.strftime("%Y%m%d_%H%M%S")
filename = f"best_simple_images_cnn_model_{timestamp}.pth"
print(f"Model will be saved as: {filename}")

if best_model is not None:
    torch.save(best_model, filename)
    print(f"Best model saved to '{filename}'.")

In [8]:
# BLOCK 9 - Testing the model on the test set

def evaluate_model_on_test_set(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        test_predictions = []
        test_true = []
        for inputs, labels in DataLoader(TensorDataset(X_test, y_test), batch_size=16, shuffle=False):
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            test_predictions.extend(predicted.numpy())
            test_true.extend(labels.numpy())

    accuracy = accuracy_score(test_true, test_predictions)
    precision = precision_score(test_true, test_predictions, average='macro')
    recall = recall_score(test_true, test_predictions, average='macro')
    f1 = f1_score(test_true, test_predictions, average='macro')

    print("Test set evaluation results:")
    print(f"  Accuracy: {accuracy}")
    print(f"  Precision: {precision}")
    print(f"  Recall: {recall}")
    print(f"  F1 Score: {f1}")

    return accuracy, precision, recall, f1

# Prepare the test data
X_test_tensor = torch.tensor(X_test_full, dtype=torch.float32).permute(0, 2, 1).unsqueeze(3)
y_test_tensor = torch.tensor(y_test_full, dtype=torch.long)

# Load the best model
model = CNNModel(**best_hyperparameters)
model.load_state_dict(torch.load(filename))

# Evaluate the model on the test set
evaluate_model_on_test_set(model, X_test_tensor, y_test_tensor)


Test set evaluation results:
  Accuracy: 0.917797717571217
  Precision: 0.9134568367293017
  Recall: 0.9055249910166071
  F1 Score: 0.9092475604292138


(0.917797717571217, 0.9134568367293017, 0.9055249910166071, 0.9092475604292138)