## Pacakge & Data Import

In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, RandomSampler
import torch.optim as optim
from tqdm.notebook import tqdm
from torchsummary import summary
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
import random

In [2]:
# import labels for gudhi shapes
gudhi_shape_labels = np.genfromtxt('Gudhi Shape Dataset/shape_labels.csv', delimiter=',', skip_header=1)
gudhi_shape_labels = gudhi_shape_labels.astype(int)[:,2]
print(len(gudhi_shape_labels))

2000


In [None]:
num_samples = 2000 # currently set to full dataset

# Generate random indices
random_indices = np.random.choice(len(gudhi_shape_labels), size=num_samples, replace=False)
base = 'Gudhi Shape Dataset/'
# Select the corresponding data and labels
gudhi_laplacians = []
gudhi_vr_persistence_images = []
gudhi_abstract_persistence_images = []
gudhi_selected_labels = []

for i in random_indices:
    gudhi_laplacians.append(np.genfromtxt(f'{base}/shape_{i}_laplacian.csv', delimiter=',', skip_header=0))
    gudhi_vr_persistence_images.append(np.genfromtxt(f'{base}/shape_{i}_vr_persistence_image.csv', delimiter=',', skip_header=0))
    gudhi_abstract_persistence_images.append(np.genfromtxt(f'{base}/shape_{i}_abstract_persistence_image.csv', delimiter=',', skip_header=0))
    gudhi_selected_labels.append(gudhi_shape_labels[i])

# Convert selected labels to NumPy array
gudhi_selected_labels = np.array(gudhi_selected_labels)

# Print summary
print(f"Randomly selected {num_samples} samples.")
print(f"Shape of laplacians: {np.array(gudhi_laplacians).shape}")
print(f"Shape of VR persistence images: {np.array(gudhi_vr_persistence_images).shape}")
print(f"Shape of abstract persistence images: {np.array(gudhi_abstract_persistence_images).shape}")
print(f"Shape of selected labels: {gudhi_selected_labels.shape}")

Randomly selected 2000 samples.
Shape of laplacians: (2000, 1000, 1000)
Shape of VR persistence images: (2000, 100, 100)
Shape of abstract persistence images: (2000, 100, 100)
Shape of selected labels: (2000,)


In [4]:
# import labels for medical shapes
medical_shape_labels = np.genfromtxt('Medical Dataset/shape_labels.csv', delimiter=',', skip_header=1)
medical_shape_labels = medical_shape_labels.astype(int)[:,2]
print(len(medical_shape_labels))

162


In [None]:
num_samples = 162 # currently set to full dataset

# Generate random indices
random_indices = np.random.choice(len(medical_shape_labels), size=num_samples, replace=False)
base = 'Medical Dataset/'
# Select the corresponding data and labels
medical_laplacians = []
medical_vr_persistence_images = []
medical_abstract_persistence_images = []
medical_selected_labels = []

for i in random_indices:
    medical_laplacians.append(np.genfromtxt(f'{base}/shape_{i}_laplacian.csv', delimiter=',', skip_header=0))
    medical_vr_persistence_images.append(np.genfromtxt(f'{base}/shape_{i}_vr_persistence_image.csv', delimiter=',', skip_header=0))
    medical_abstract_persistence_images.append(np.genfromtxt(f'{base}/shape_{i}_abstract_persistence_image.csv', delimiter=',', skip_header=0))
    medical_selected_labels.append(medical_shape_labels[i])

# Convert selected labels to NumPy array
medical_selected_labels = np.array(medical_selected_labels)

# Print summary
print(f"Randomly selected {num_samples} samples.")
print(f"Shape of laplacians: {np.array(medical_laplacians).shape}")
print(f"Shape of VR persistence images: {np.array(medical_vr_persistence_images).shape}")
print(f"Shape of abstract persistence images: {np.array(medical_abstract_persistence_images).shape}")
print(f"Shape of selected labels: {medical_selected_labels.shape}")

Randomly selected 162 samples.
Shape of laplacians: (162, 1000, 1000)
Shape of VR persistence images: (162, 100, 100)
Shape of abstract persistence images: (162, 100, 100)
Shape of selected labels: (162,)


## Define data class

In [6]:
class ShapeDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = [torch.tensor(d, dtype=torch.float32).unsqueeze(0) for d in data]
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]


## Data Splitting

In [7]:
train_ratio = 0.8
valid_ratio = 0.1
test_ratio = 0.1

# Split Gudhi Laplacians
train_data_gudhi_laplacians, test_data_gudhi_laplacians, train_labels_gudhi, test_labels_gudhi = train_test_split(
    gudhi_laplacians, gudhi_selected_labels, test_size=(1 - train_ratio), random_state=42
)
valid_data_gudhi_laplacians, test_data_gudhi_laplacians, valid_labels_gudhi, test_labels_gudhi = train_test_split(
    test_data_gudhi_laplacians, test_labels_gudhi, test_size=(test_ratio / (valid_ratio + test_ratio)), random_state=42
)

# Split Medical Laplacians (valid/test split only, since all are used for validation/testing)
valid_data_medical_laplacians, test_data_medical_laplacians, valid_labels_medical, test_labels_medical = train_test_split(
    medical_laplacians, medical_selected_labels, test_size=0.5, random_state=42
)

# Combine validation sets for Laplacians and labels
valid_laplacians = valid_data_gudhi_laplacians + valid_data_medical_laplacians
valid_labels = np.concatenate((valid_labels_gudhi, valid_labels_medical))

# Combine test sets for completeness
test_laplacians = test_data_gudhi_laplacians + test_data_medical_laplacians
test_labels = np.concatenate((test_labels_gudhi, test_labels_medical))

# Print a summary
print(f"Laplacians Train data size: {len(train_data_gudhi_laplacians)}")
print(f"Laplacians Validation data size: {len(valid_laplacians)}")
print(f"Laplacians Test data size: {len(test_laplacians)}")


# Split VR Persistence Images
train_data_gudhi_vr_persistence_images, test_data_gudhi_vr_persistence_images, train_labels_gudhi_check, test_labels_gudhi_check = train_test_split(
    gudhi_vr_persistence_images, gudhi_selected_labels, test_size=(1 - train_ratio), random_state=42
)
valid_data_gudhi_vr_persistence_images, test_data_gudhi_vr_persistence_images, valid_labels_gudhi_check, test_labels_gudhi_check = train_test_split(
    test_data_gudhi_vr_persistence_images, test_labels_gudhi_check, test_size=(test_ratio / (valid_ratio + test_ratio)), random_state=42
)

# Sanity check: Ensure indices for labels are consistent
assert np.array_equal(train_labels_gudhi, train_labels_gudhi_check), "Train labels do not match for Laplacians and VR persistence images!"
assert np.array_equal(test_labels_gudhi, test_labels_gudhi_check), "Test labels do not match for Laplacians and VR persistence images!"
assert np.array_equal(valid_labels_gudhi, valid_labels_gudhi_check), "Validation labels do not match for Laplacians and VR persistence images!"

# Split Medical VR Persistence Images (valid/test split only, since all are used for validation/testing)
valid_data_medical_vr_persistence_images, test_data_medical_vr_persistence_images, valid_labels_medical_check, test_labels_medical_check = train_test_split(
    medical_vr_persistence_images, medical_selected_labels, test_size=0.5, random_state=42
)

# Sanity check: Ensure medical label splits are consistent
assert np.array_equal(valid_labels_medical, valid_labels_medical_check), "Validation labels do not match for Medical Laplacians and VR persistence images!"
assert np.array_equal(test_labels_medical, test_labels_medical_check), "Test labels do not match for Medical Laplacians and VR persistence images!"


# Combine validation sets for VR Persistence Images and labels
valid_vr_persistence_images = valid_data_gudhi_vr_persistence_images + valid_data_medical_vr_persistence_images
valid_vr_labels = np.concatenate((valid_labels_gudhi, valid_labels_medical))

# Combine test sets for VR Persistence Images and labels
test_vr_persistence_images = test_data_gudhi_vr_persistence_images + test_data_medical_vr_persistence_images
test_vr_labels = np.concatenate((test_labels_gudhi, test_labels_medical))


# Split Abstract Persistence Images
train_data_gudhi_abstract_persistence_images, test_data_gudhi_abstract_persistence_images, train_labels_gudhi_check, test_labels_gudhi_check = train_test_split(
    gudhi_abstract_persistence_images, gudhi_selected_labels, test_size=(1 - train_ratio), random_state=42
)
valid_data_gudhi_abstract_persistence_images, test_data_gudhi_abstract_persistence_images, valid_labels_gudhi_check, test_labels_gudhi_check = train_test_split(
    test_data_gudhi_abstract_persistence_images, test_labels_gudhi_check, test_size=(test_ratio / (valid_ratio + test_ratio)), random_state=42
)

# Sanity check: Ensure indices for labels are consistent
assert np.array_equal(train_labels_gudhi, train_labels_gudhi_check), "Train labels do not match for Laplacians and Abstract persistence images!"
assert np.array_equal(test_labels_gudhi, test_labels_gudhi_check), "Test labels do not match for Laplacians and Abstract persistence images!"
assert np.array_equal(valid_labels_gudhi, valid_labels_gudhi_check), "Validation labels do not match for Laplacians and Abstract persistence images!"

# Split Medical Abstract Persistence Images (valid/test split only, since all are used for validation/testing)
valid_data_medical_abstract_persistence_images, test_data_medical_abstract_persistence_images, valid_labels_medical_check, test_labels_medical_check = train_test_split(
    medical_abstract_persistence_images, medical_selected_labels, test_size=0.5, random_state=42
)

# Sanity check: Ensure medical label splits are consistent
assert np.array_equal(valid_labels_medical, valid_labels_medical_check), "Validation labels do not match for Medical Laplacians and Abstract persistence images!"
assert np.array_equal(test_labels_medical, test_labels_medical_check), "Test labels do not match for Medical Laplacians and Abstract persistence images!"


# Combine validation sets for Abstract Persistence Images and labels
valid_abstract_persistence_images = valid_data_gudhi_abstract_persistence_images + valid_data_medical_abstract_persistence_images
valid_abstract_labels = np.concatenate((valid_labels_gudhi, valid_labels_medical))

# Combine test sets for Abstract Persistence Images and labels
test_abstract_persistence_images = test_data_gudhi_abstract_persistence_images + test_data_medical_abstract_persistence_images
test_abstract_labels = np.concatenate((test_labels_gudhi, test_labels_medical))


Laplacians Train data size: 1600
Laplacians Validation data size: 281
Laplacians Test data size: 281


## CNN Definitions

In [8]:
class CNN(nn.Module):
    def __init__(self, input_shape, num_classes=2):
        super(CNN, self).__init__()
        # Convolutional Layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        
        # Pooling Layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Adaptive Pooling to resize to 100x100
        self.adaptive_pool = nn.AdaptiveAvgPool2d((100, 100))
        
        # Dynamically calculate input size to fc1
        self.feature_size = self._get_feature_size(input_shape)
        
        # Fully Connected Layers
        self.fc1 = nn.Linear(self.feature_size, 128)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def _get_feature_size(self, input_shape):
        # Create a dummy input to calculate size after conv and pooling
        dummy_input = torch.zeros(1, 1, *input_shape)
        x = self.pool(F.relu(self.conv1(dummy_input)))
        x = self.pool(F.relu(self.conv2(x)))
        
        # Apply adaptive pooling to get 100x100 size
        x = self.adaptive_pool(x)
        return x.numel()  # Number of elements after flattening

    def forward(self, x):
        # Apply convolutional layers with pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        
        # Apply adaptive pooling to resize to 100x100
        x = self.adaptive_pool(x)
        
        # Flatten and pass through fully connected layers
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)


In [9]:
class DualInputCNN(nn.Module):
    def __init__(self, input_shape1, input_shape2, num_classes=2):
        super(DualInputCNN, self).__init__()

        # Laplacian input path with additional pooling to reduce to 100x100
        self.conv1_lap = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2_lap = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool_lap = nn.MaxPool2d(2, 2)  # Reduce spatial dimensions
        self.adaptive_pool_lap = nn.AdaptiveAvgPool2d((100, 100))  # Resize to 100x100
        
        # Persistence image input path (no pooling)
        self.conv1_pers = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2_pers = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.adaptive_pool_pers = nn.AdaptiveAvgPool2d((100, 100))  # Resize to 100x100
        
        # Fully connected layers
        self.fc1 = nn.Linear(32 * 100 * 100 + 32 * 100 * 100, 128)  # Adjusted for 100x100 input
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x1, x2):
        # Laplacians path (downsampling to 100x100)
        x1 = F.relu(self.conv1_lap(x1))
        x1 = self.pool_lap(x1)  # First pool: 250x250 -> 125x125
        x1 = F.relu(self.conv2_lap(x1))
        x1 = self.pool_lap(x1)  # Second pool: 125x125 -> 62x62
        x1 = self.adaptive_pool_lap(x1)  # Resize to 100x100
        
        # Persistence images path (no pooling)
        x2 = F.relu(self.conv1_pers(x2))
        x2 = F.relu(self.conv2_pers(x2))
        x2 = self.adaptive_pool_pers(x2)  # Ensure persistence images are 100x100
        
        # Concatenate along dim=1 (channels)
        x = torch.cat((x1, x2), dim=1)  # Concatenates the outputs along the channel axis

        # Flatten for fully connected layer
        x = torch.flatten(x, start_dim=1)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on {device}")

Training on cpu


## Data Preparation

In [11]:
# Convert data to PyTorch Datasets
train_dataset_laplacians = ShapeDataset(train_data_gudhi_laplacians, train_labels_gudhi)
valid_dataset_laplacians = ShapeDataset(valid_laplacians, valid_labels)
test_dataset_laplacians = ShapeDataset(test_laplacians, test_labels)

train_dataset_vr = ShapeDataset(train_data_gudhi_vr_persistence_images, train_labels_gudhi)
valid_dataset_vr = ShapeDataset(valid_vr_persistence_images, valid_vr_labels)
test_dataset_vr = ShapeDataset(test_vr_persistence_images, test_vr_labels)

train_dataset_abstract = ShapeDataset(train_data_gudhi_abstract_persistence_images, train_labels_gudhi)
valid_dataset_abstract = ShapeDataset(valid_abstract_persistence_images, valid_abstract_labels)
test_dataset_abstract = ShapeDataset(test_abstract_persistence_images, test_abstract_labels)

# Define DataLoaders
batch_size = 32

train_loader_laplacians = torch.utils.data.DataLoader(train_dataset_laplacians, batch_size=batch_size, shuffle=False)
valid_loader_laplacians = torch.utils.data.DataLoader(valid_dataset_laplacians, batch_size=batch_size, shuffle=False)
test_loader_laplacians = torch.utils.data.DataLoader(test_dataset_laplacians, batch_size=batch_size, shuffle=False)

train_loader_vr = torch.utils.data.DataLoader(train_dataset_vr, batch_size=batch_size, shuffle=False)
valid_loader_vr = torch.utils.data.DataLoader(valid_dataset_vr, batch_size=batch_size, shuffle=False)
test_loader_vr = torch.utils.data.DataLoader(test_dataset_vr, batch_size=batch_size, shuffle=False)

train_loader_abstract = torch.utils.data.DataLoader(train_dataset_abstract, batch_size=batch_size, shuffle=False)
valid_loader_abstract = torch.utils.data.DataLoader(valid_dataset_abstract, batch_size=batch_size, shuffle=False)
test_loader_abstract = torch.utils.data.DataLoader(test_dataset_abstract, batch_size=batch_size, shuffle=False)

## Model Instantiation

In [12]:
# For single-input CNN (Laplacians)
input_shape = train_data_gudhi_laplacians[0].shape
num_classes = 2 # binary classification

model_single_laplacians = CNN(input_shape, num_classes)

# For dual-input CNNs (Laplacians + VR Persistence Images, Laplacians + Abstract Persistence Images)
input_shape1 = train_data_gudhi_laplacians[0].shape
input_shape2 = train_data_gudhi_vr_persistence_images[0].shape
input_shape3 = train_data_gudhi_abstract_persistence_images[0].shape

model_dual_lap_vr = DualInputCNN(input_shape1, input_shape2, num_classes)
model_dual_lap_abstract = DualInputCNN(input_shape1, input_shape3, num_classes)

## Training, Validation, and Testing Functions

In [None]:
def train_single_input(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    correct = 0
    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    
    for inputs, labels in progress_bar:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == labels).sum().item()
        
        progress_bar.set_postfix(loss=loss.item())
    
    accuracy = correct / len(dataloader.dataset)
    return total_loss / len(dataloader.dataset), accuracy


def validate_single_input(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    progress_bar = tqdm(dataloader, desc="Validating", leave=False)
    
    with torch.no_grad():
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            
            progress_bar.set_postfix(loss=loss.item())
    
    accuracy = correct / len(dataloader.dataset)
    return total_loss / len(dataloader.dataset), accuracy


def train_dual_input(model, dataloader1, dataloader2, optimizer, criterion, device):
    model.train()
    total_loss = 0
    correct = 0
    progress_bar = tqdm(zip(dataloader1, dataloader2), desc="Training (Dual Input)", leave=False, total=min(len(dataloader1), len(dataloader2)))
    
    for (inputs1, labels1), (inputs2, labels2) in progress_bar:
        inputs1, labels1 = inputs1.to(device), labels1.to(device)
        inputs2, labels2 = inputs2.to(device), labels2.to(device)
        
        if not torch.equal(labels1, labels2):
            print("Labels mismatch in dual-input training! Skipping batch.")
            continue
        
        optimizer.zero_grad()
        outputs = model(inputs1, inputs2)
        loss = criterion(outputs, labels1)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == labels1).sum().item()
        
        progress_bar.set_postfix(loss=loss.item())
    
    accuracy = correct / len(dataloader1.dataset)
    return total_loss / len(dataloader1.dataset), accuracy


def validate_dual_input(model, valid_loader_laplacians, valid_loader_vr, criterion, device='cuda'):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for (inputs1, _), (inputs2, targets) in zip(valid_loader_laplacians, valid_loader_vr):
            inputs1, inputs2, targets = inputs1.to(device), inputs2.to(device), targets.to(device)
            
            outputs = model(inputs1, inputs2)
            loss = criterion(outputs, targets)
            running_loss += loss.item()
            
            if outputs.shape[-1] > 1:
                _, predicted = torch.max(outputs, 1)
            else:  # Binary classification
                predicted = (outputs > 0.5).float()
            
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    avg_loss = running_loss / len(valid_loader_laplacians)
    accuracy = correct / total

    return avg_loss, accuracy

def test_single_input(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    progress_bar = tqdm(dataloader, desc="Testing", leave=False)

    with torch.no_grad():
        for inputs, labels in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(dataloader.dataset)
    accuracy = correct / total
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')
    f1 = f1_score(all_labels, all_preds, average='binary')

    metrics_df = pd.DataFrame({
        'Loss': [avg_loss],
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Recall': [recall],
        'F1 Score': [f1]
    })

    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")
    return metrics_df


def test_dual_input(model, dataloader1, dataloader2, criterion, device='cuda'):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    progress_bar = tqdm(zip(dataloader1, dataloader2), desc="Testing (Dual Input)", leave=False, total=min(len(dataloader1), len(dataloader2)))

    with torch.no_grad():
        for (inputs1, _), (inputs2, targets) in progress_bar:
            inputs1, inputs2, targets = inputs1.to(device), inputs2.to(device), targets.to(device)

            outputs = model(inputs1, inputs2)
            loss = criterion(outputs, targets)
            running_loss += loss.item()

            if outputs.shape[-1] > 1:
                _, predicted = torch.max(outputs, 1)
            else:  # Binary classification
                predicted = (outputs > 0.5).float()

            total += targets.size(0)
            correct += (predicted == targets).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

    avg_loss = running_loss / len(dataloader1.dataset)
    accuracy = correct / total
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')
    f1 = f1_score(all_labels, all_preds, average='binary')

    metrics_df = pd.DataFrame({
        'Loss': [avg_loss],
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Recall': [recall],
        'F1 Score': [f1]
    })

    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")
    return metrics_df

## Training Loops

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_single_laplacians.to(device)
model_dual_lap_vr.to(device)
model_dual_lap_abstract.to(device)

DualInputCNN(
  (conv1_lap): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_lap): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool_lap): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (adaptive_pool_lap): AdaptiveAvgPool2d(output_size=(100, 100))
  (conv1_pers): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_pers): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (adaptive_pool_pers): AdaptiveAvgPool2d(output_size=(100, 100))
  (fc1): Linear(in_features=640000, out_features=128, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer_single = optim.Adam(model_single_laplacians.parameters(), lr=0.001)
optimizer_dual_vr = optim.Adam(model_dual_lap_vr.parameters(), lr=0.001)
optimizer_dual_abstract = optim.Adam(model_dual_lap_abstract.parameters(), lr=0.001)

num_epochs = 10

In [16]:
# Initialize empty lists to store all metrics for each epoch
test_metrics_single_list = []
test_metrics_dual_vr_list = []
test_metrics_dual_abstract_list = []

# Training loop for single-input model (Laplacians)
for epoch in range(num_epochs):
    model_single_laplacians.train()  # Ensure the model is in training mode
    train_loss, train_acc = train_single_input(model_single_laplacians, train_loader_laplacians, optimizer_single, criterion, device)
    model_single_laplacians.eval()  # Switch model to evaluation mode after training
    valid_loss, valid_acc = validate_single_input(model_single_laplacians, valid_loader_laplacians, criterion, device)

    # Test the model after each epoch
    test_metrics_single = test_single_input(model_single_laplacians, test_loader_laplacians, criterion, device)
    
    # Combine train, valid, and test metrics for this epoch
    metrics = test_metrics_single.copy()
    metrics['Train Loss'] = train_loss
    metrics['Train Accuracy'] = train_acc
    metrics['Valid Loss'] = valid_loss
    metrics['Valid Accuracy'] = valid_acc
    
    # Append to the list
    test_metrics_single_list.append(metrics)

    print(f"single-input model (Laplacians) Epoch {epoch+1}/{num_epochs}, "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}")

torch.save(model_single_laplacians.state_dict(), "model_single_laplacians_final.pth")

# Training loop for dual-input model (Laplacians + VR Persistence Images)
for epoch in range(num_epochs):
    model_dual_lap_vr.train()  # Ensure the model is in training mode
    train_loss, train_acc = train_dual_input(model_dual_lap_vr, train_loader_laplacians, train_loader_vr, optimizer_dual_vr, criterion, device)
    model_dual_lap_vr.eval()  # Switch model to evaluation mode after training
    valid_loss, valid_acc = validate_dual_input(model_dual_lap_vr, valid_loader_laplacians, valid_loader_vr, criterion, device)

    # Test the model after each epoch
    test_metrics_dual_vr = test_dual_input(model_dual_lap_vr, test_loader_laplacians, test_loader_vr, criterion, device)
    
    # Combine train, valid, and test metrics for this epoch
    metrics = test_metrics_dual_vr.copy()
    metrics['Train Loss'] = train_loss
    metrics['Train Accuracy'] = train_acc
    metrics['Valid Loss'] = valid_loss
    metrics['Valid Accuracy'] = valid_acc
    
    # Append to the list
    test_metrics_dual_vr_list.append(metrics)

    print(f"dual-input model (Laplacians + VR Persistence Images) Epoch {epoch+1}/{num_epochs}, "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}")

torch.save(model_dual_lap_vr.state_dict(), "model_dual_lap_vr_final.pth")

# Training loop for dual-input model (Laplacians + Abstract Persistence Images)
for epoch in range(num_epochs):
    model_dual_lap_abstract.train()  # Ensure the model is in training mode
    train_loss, train_acc = train_dual_input(model_dual_lap_abstract, train_loader_laplacians, train_loader_abstract, optimizer_dual_abstract, criterion, device)
    model_dual_lap_abstract.eval()  # Switch model to evaluation mode after training
    valid_loss, valid_acc = validate_dual_input(model_dual_lap_abstract, valid_loader_laplacians, valid_loader_abstract, criterion, device)

    # Test the model after each epoch
    test_metrics_dual_abstract = test_dual_input(model_dual_lap_abstract, test_loader_laplacians, test_loader_abstract, criterion, device)
    
    # Combine train, valid, and test metrics for this epoch
    metrics = test_metrics_dual_abstract.copy()
    metrics['Train Loss'] = train_loss
    metrics['Train Accuracy'] = train_acc
    metrics['Valid Loss'] = valid_loss
    metrics['Valid Accuracy'] = valid_acc
    
    # Append to the list
    test_metrics_dual_abstract_list.append(metrics)

    print(f"dual-input model (Laplacians + Abstract Persistence Images) Epoch {epoch+1}/{num_epochs}, "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Valid Loss: {valid_loss:.4f}, Valid Acc: {valid_acc:.4f}")

torch.save(model_dual_lap_abstract.state_dict(), "model_dual_lap_abstract_final.pth")


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0540, Test Accuracy: 0.8897, Precision: 0.9439, Recall: 0.8016, F1 Score: 0.8670
single-input model (Laplacians) Epoch 1/10, Train Loss: 0.0531, Train Acc: 0.8444, Valid Loss: 0.0560, Valid Acc: 0.8612


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1029, Test Accuracy: 0.8754, Precision: 0.9099, Recall: 0.8016, F1 Score: 0.8523
single-input model (Laplacians) Epoch 2/10, Train Loss: 0.0066, Train Acc: 0.9413, Valid Loss: 0.1055, Valid Acc: 0.8719


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1255, Test Accuracy: 0.8968, Precision: 0.9619, Recall: 0.8016, F1 Score: 0.8745
single-input model (Laplacians) Epoch 3/10, Train Loss: 0.0067, Train Acc: 0.9437, Valid Loss: 0.1289, Valid Acc: 0.8754


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1473, Test Accuracy: 0.8790, Precision: 0.9182, Recall: 0.8016, F1 Score: 0.8559
single-input model (Laplacians) Epoch 4/10, Train Loss: 0.0052, Train Acc: 0.9513, Valid Loss: 0.1510, Valid Acc: 0.8754


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.2099, Test Accuracy: 0.8897, Precision: 0.9612, Recall: 0.7857, F1 Score: 0.8646
single-input model (Laplacians) Epoch 5/10, Train Loss: 0.0053, Train Acc: 0.9481, Valid Loss: 0.2151, Valid Acc: 0.8612


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1965, Test Accuracy: 0.8968, Precision: 0.9619, Recall: 0.8016, F1 Score: 0.8745
single-input model (Laplacians) Epoch 6/10, Train Loss: 0.0051, Train Acc: 0.9537, Valid Loss: 0.2021, Valid Acc: 0.8754


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.2267, Test Accuracy: 0.8754, Precision: 0.9252, Recall: 0.7857, F1 Score: 0.8498
single-input model (Laplacians) Epoch 7/10, Train Loss: 0.0051, Train Acc: 0.9569, Valid Loss: 0.2326, Valid Acc: 0.8612


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.2153, Test Accuracy: 0.8968, Precision: 0.9619, Recall: 0.8016, F1 Score: 0.8745
single-input model (Laplacians) Epoch 8/10, Train Loss: 0.0055, Train Acc: 0.9544, Valid Loss: 0.2209, Valid Acc: 0.8754


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1998, Test Accuracy: 0.8968, Precision: 0.9619, Recall: 0.8016, F1 Score: 0.8745
single-input model (Laplacians) Epoch 9/10, Train Loss: 0.0059, Train Acc: 0.9475, Valid Loss: 0.2051, Valid Acc: 0.8719


Training:   0%|          | 0/50 [00:00<?, ?it/s]

Validating:   0%|          | 0/9 [00:00<?, ?it/s]

Testing:   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.2066, Test Accuracy: 0.8968, Precision: 0.9619, Recall: 0.8016, F1 Score: 0.8745
single-input model (Laplacians) Epoch 10/10, Train Loss: 0.0052, Train Acc: 0.9569, Valid Loss: 0.2121, Valid Acc: 0.8719


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0786, Test Accuracy: 0.8185, Precision: 0.7168, Recall: 0.9841, F1 Score: 0.8294
dual-input model (Laplacians + VR Persistence Images) Epoch 1/10, Train Loss: 0.9655, Train Acc: 0.8569, Valid Loss: 2.1300, Valid Acc: 0.8327


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0524, Test Accuracy: 0.8114, Precision: 0.7135, Recall: 0.9683, F1 Score: 0.8215
dual-input model (Laplacians + VR Persistence Images) Epoch 2/10, Train Loss: 0.0028, Train Acc: 0.9838, Valid Loss: 1.3202, Valid Acc: 0.8292


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0434, Test Accuracy: 0.8185, Precision: 0.7219, Recall: 0.9683, F1 Score: 0.8271
dual-input model (Laplacians + VR Persistence Images) Epoch 3/10, Train Loss: 0.0017, Train Acc: 0.9888, Valid Loss: 1.0549, Valid Acc: 0.8292


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0348, Test Accuracy: 0.8292, Precision: 0.7378, Recall: 0.9603, F1 Score: 0.8345
dual-input model (Laplacians + VR Persistence Images) Epoch 4/10, Train Loss: 0.0013, Train Acc: 0.9856, Valid Loss: 0.7646, Valid Acc: 0.8434


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0365, Test Accuracy: 0.8256, Precision: 0.7362, Recall: 0.9524, F1 Score: 0.8304
dual-input model (Laplacians + VR Persistence Images) Epoch 5/10, Train Loss: 0.0007, Train Acc: 0.9906, Valid Loss: 0.8004, Valid Acc: 0.8399


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0319, Test Accuracy: 0.8185, Precision: 0.7389, Recall: 0.9206, F1 Score: 0.8198
dual-input model (Laplacians + VR Persistence Images) Epoch 6/10, Train Loss: 0.0032, Train Acc: 0.9819, Valid Loss: 0.5420, Valid Acc: 0.8612


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0382, Test Accuracy: 0.8114, Precision: 0.7160, Recall: 0.9603, F1 Score: 0.8203
dual-input model (Laplacians + VR Persistence Images) Epoch 7/10, Train Loss: 0.0039, Train Acc: 0.9719, Valid Loss: 0.8732, Valid Acc: 0.8363


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0379, Test Accuracy: 0.8185, Precision: 0.7358, Recall: 0.9286, F1 Score: 0.8211
dual-input model (Laplacians + VR Persistence Images) Epoch 8/10, Train Loss: 0.0011, Train Acc: 0.9862, Valid Loss: 0.8149, Valid Acc: 0.8434


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0487, Test Accuracy: 0.8292, Precision: 0.7378, Recall: 0.9603, F1 Score: 0.8345
dual-input model (Laplacians + VR Persistence Images) Epoch 9/10, Train Loss: 0.0007, Train Acc: 0.9938, Valid Loss: 1.1236, Valid Acc: 0.8399


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.0625, Test Accuracy: 0.8078, Precision: 0.7069, Recall: 0.9762, F1 Score: 0.8200
dual-input model (Laplacians + VR Persistence Images) Epoch 10/10, Train Loss: 0.0010, Train Acc: 0.9925, Valid Loss: 1.5710, Valid Acc: 0.8292


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1512, Test Accuracy: 0.8897, Precision: 1.0000, Recall: 0.7540, F1 Score: 0.8597
dual-input model (Laplacians + Abstract Persistence Images) Epoch 1/10, Train Loss: 0.2715, Train Acc: 0.8725, Valid Loss: 4.7810, Valid Acc: 0.8861


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1626, Test Accuracy: 0.8968, Precision: 0.9802, Recall: 0.7857, F1 Score: 0.8722
dual-input model (Laplacians + Abstract Persistence Images) Epoch 2/10, Train Loss: 0.0017, Train Acc: 0.9781, Valid Loss: 5.0932, Valid Acc: 0.9039


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1393, Test Accuracy: 0.8968, Precision: 0.9802, Recall: 0.7857, F1 Score: 0.8722
dual-input model (Laplacians + Abstract Persistence Images) Epoch 3/10, Train Loss: 0.0013, Train Acc: 0.9844, Valid Loss: 4.3413, Valid Acc: 0.9039


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1481, Test Accuracy: 0.8968, Precision: 0.9802, Recall: 0.7857, F1 Score: 0.8722
dual-input model (Laplacians + Abstract Persistence Images) Epoch 4/10, Train Loss: 0.0014, Train Acc: 0.9862, Valid Loss: 4.6294, Valid Acc: 0.9039


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1698, Test Accuracy: 0.8968, Precision: 0.9709, Recall: 0.7937, F1 Score: 0.8734
dual-input model (Laplacians + Abstract Persistence Images) Epoch 5/10, Train Loss: 0.0011, Train Acc: 0.9881, Valid Loss: 5.2995, Valid Acc: 0.9004


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1681, Test Accuracy: 0.9004, Precision: 0.9804, Recall: 0.7937, F1 Score: 0.8772
dual-input model (Laplacians + Abstract Persistence Images) Epoch 6/10, Train Loss: 0.0010, Train Acc: 0.9900, Valid Loss: 5.2732, Valid Acc: 0.9039


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1760, Test Accuracy: 0.9004, Precision: 0.9804, Recall: 0.7937, F1 Score: 0.8772
dual-input model (Laplacians + Abstract Persistence Images) Epoch 7/10, Train Loss: 0.0007, Train Acc: 0.9912, Valid Loss: 5.5024, Valid Acc: 0.9039


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1699, Test Accuracy: 0.8968, Precision: 0.9709, Recall: 0.7937, F1 Score: 0.8734
dual-input model (Laplacians + Abstract Persistence Images) Epoch 8/10, Train Loss: 0.0008, Train Acc: 0.9925, Valid Loss: 5.2558, Valid Acc: 0.9039


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1653, Test Accuracy: 0.9004, Precision: 0.9804, Recall: 0.7937, F1 Score: 0.8772
dual-input model (Laplacians + Abstract Persistence Images) Epoch 9/10, Train Loss: 0.0008, Train Acc: 0.9919, Valid Loss: 5.0739, Valid Acc: 0.9004


Training (Dual Input):   0%|          | 0/50 [00:00<?, ?it/s]

Testing (Dual Input):   0%|          | 0/9 [00:00<?, ?it/s]

Test Loss: 0.1545, Test Accuracy: 0.8932, Precision: 0.9528, Recall: 0.8016, F1 Score: 0.8707
dual-input model (Laplacians + Abstract Persistence Images) Epoch 10/10, Train Loss: 0.0007, Train Acc: 0.9900, Valid Loss: 4.5546, Valid Acc: 0.9004


In [None]:
# Convert the lists of metrics into DataFrames with epoch as the index
test_metrics_single_df = pd.concat(test_metrics_single_list, ignore_index=True)
test_metrics_dual_vr_df = pd.concat(test_metrics_dual_vr_list, ignore_index=True)
test_metrics_dual_abstract_df = pd.concat(test_metrics_dual_abstract_list, ignore_index=True)

# Add 'Epoch' as new column
epochs = list(range(1, num_epochs + 1))

test_metrics_single_df['Epoch'] = epochs
test_metrics_dual_vr_df['Epoch'] = epochs
test_metrics_dual_abstract_df['Epoch'] = epochs

# Set 'Epoch' as index
test_metrics_single_df.set_index('Epoch', inplace=True)
test_metrics_dual_vr_df.set_index('Epoch', inplace=True)
test_metrics_dual_abstract_df.set_index('Epoch', inplace=True)

# Rename first two columns to 'Test Loss' and 'Test Accuracy'
test_metrics_single_df.rename(columns={'Loss': 'Test Loss', 'Accuracy': 'Test Accuracy'}, inplace=True)
test_metrics_dual_vr_df.rename(columns={'Loss': 'Test Loss', 'Accuracy': 'Test Accuracy'}, inplace=True)
test_metrics_dual_abstract_df.rename(columns={'Loss': 'Test Loss', 'Accuracy': 'Test Accuracy'}, inplace=True)

# Print final test metrics for all models
print("\nSingle-input model (Laplacians) Test Metrics:")
print(test_metrics_single_df)

print("\nDual-input model (Laplacians + VR Persistence Images) Test Metrics:")
print(test_metrics_dual_vr_df)

print("\nDual-input model (Laplacians + Abstract Persistence Images) Test Metrics:")
print(test_metrics_dual_abstract_df)



Single-input model (Laplacians) Test Metrics:
       Test Loss  Test Accuracy  Precision    Recall  F1 Score  Train Loss  \
Epoch                                                                        
1       0.053981       0.889680   0.943925  0.801587  0.866953    0.053081   
2       0.102899       0.875445   0.909910  0.801587  0.852321    0.006592   
3       0.125530       0.896797   0.961905  0.801587  0.874459    0.006663   
4       0.147281       0.879004   0.918182  0.801587  0.855932    0.005214   
5       0.209872       0.889680   0.961165  0.785714  0.864629    0.005348   
6       0.196451       0.896797   0.961905  0.801587  0.874459    0.005096   
7       0.226710       0.875445   0.925234  0.785714  0.849785    0.005122   
8       0.215337       0.896797   0.961905  0.801587  0.874459    0.005464   
9       0.199815       0.896797   0.961905  0.801587  0.874459    0.005937   
10      0.206587       0.896797   0.961905  0.801587  0.874459    0.005184   

       Train Acc

In [None]:
# Export test metrics to CSV files

test_metrics_single_df.to_csv('test_metrics_single_laplacian_gudhi_medical.csv')
test_metrics_dual_vr_df.to_csv('test_metrics_dual_lap_vr_gudhi_medical.csv')
test_metrics_dual_abstract_df.to_csv('test_metrics_dual_lap_abstract_gudhi_medical.csv')