In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm.notebook import tqdm
from torchsummary import summary
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
# import Gudhi Shape Dataset files
points = []
laplacians = []
vr_persistence_images = []
abstract_persistence_images = []

for i in range(0,10):
    points.append(np.genfromtxt('Gudhi Shape Dataset/shape_'+str(i)+'_points.csv', delimiter=',', skip_header=0))
    laplacians.append(np.genfromtxt('Gudhi Shape Dataset/shape_'+str(i)+'_laplacian.csv', delimiter=',', skip_header=0))
    vr_persistence_images.append(np.genfromtxt('Gudhi Shape Dataset/shape_'+str(i)+'_vr_persistence_image.csv', delimiter=',', skip_header=0))
    abstract_persistence_images.append(np.genfromtxt('Gudhi Shape Dataset/shape_'+str(i)+'_abstract_persistence_image.csv', delimiter=',', skip_header=0))


# import labels for the shapes
shape_labels = np.genfromtxt('Gudhi Shape Dataset/shape_labels.csv', delimiter=',', skip_header=1)
shape_labels = shape_labels.astype(int)[:,2]
print(shape_labels)

[1 1 1 1 1 0 0 0 0 0]


In [3]:
class ShapeDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = [torch.tensor(d, dtype=torch.float32).unsqueeze(0) for d in data]
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [4]:
class CNN(nn.Module):
    def __init__(self, input_shape, num_classes=2):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)  # Conv layer 1
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)  # Conv layer 2
        self.dropout = nn.Dropout(0.5)  # Dropout layer
        self.fc1 = nn.Linear(32 * input_shape[0] * input_shape[1], 128)  # Fully connected layer 1
        self.fc2 = nn.Linear(128, num_classes)  # Fully connected layer 2

    def forward(self, x):
        x = F.relu(self.conv1(x))  # Apply ReLU after conv1
        x = F.relu(self.conv2(x))  # Apply ReLU after conv2
        x = torch.flatten(x, start_dim=1)  # Flatten for FC layers
        x = self.dropout(x)  # Apply dropout
        x = F.relu(self.fc1(x))  # FC layer with ReLU
        x = self.fc2(x)  # Output layer
        return F.log_softmax(x, dim=1)  # Log-Softmax for classification


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on {device}")

Training on cpu


In [9]:
def train_and_test_for_data_type(data_type, data, labels, input_shape):
    # Create dataset and split into train/test sets
    dataset = ShapeDataset(data, labels)
    train_data, test_data = train_test_split(dataset.data, test_size=0.2, random_state=42)
    train_labels, test_labels = train_test_split(dataset.labels, test_size=0.2, random_state=42)

    # Convert to custom Dataset format for train and test sets
    train_dataset = torch.utils.data.TensorDataset(torch.stack(train_data), torch.tensor(train_labels))
    test_dataset = torch.utils.data.TensorDataset(torch.stack(test_data), torch.tensor(test_labels))

    # Create DataLoaders
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Define model
    model = CNN(input_shape=input_shape, num_classes=2).to(device)

    # Define optimizer and loss function
    learning_rate = 0.001
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()  # Cross-Entropy Loss for multi-class classification

    epoch_results = []

    # Training loop
    num_epochs = 10
    for epoch in range(1, num_epochs + 1):
        print(f"Training model for {data_type} - Epoch {epoch}/{num_epochs}")
        train_loss = train(model, device, train_dataloader, optimizer, criterion, epoch)
        test_loss, accuracy = test(model, device, test_dataloader, criterion)
        
        # Store results
        epoch_results.append({
            'Epoch': epoch,
            'Train Loss': train_loss,
            'Test Loss': test_loss,
            'Test Accuracy (%)': accuracy
        })

    # Convert results to DataFrame for tabular output
    epoch_results_df = pd.DataFrame(epoch_results)
    print(f"\nTraining and testing results for {data_type}:")
    print(epoch_results_df)

In [10]:
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    tk0 = tqdm(train_loader, total=len(train_loader))
    train_loss = 0
    for batch_idx, (data, target) in enumerate(tk0):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        tk0.set_postfix(loss=loss.item())
    return train_loss / len(train_loader)


def test(model, device, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, accuracy

In [None]:
# Train and test for Laplacians (1000x1000 input)
train_and_test_for_data_type("Laplacians", laplacians, shape_labels, input_shape=(1000, 1000))

# Train and test for VR Persistence Images (100x100 input)
train_and_test_for_data_type("VR Persistence Images", vr_persistence_images, shape_labels, input_shape=(100, 100))

# Train and test for Abstract Persistence Images (100x100 input)
train_and_test_for_data_type("Abstract Persistence Images", abstract_persistence_images, shape_labels, input_shape=(100, 100))


  train_dataset = torch.utils.data.TensorDataset(torch.stack(train_data), torch.tensor(train_labels))
  test_dataset = torch.utils.data.TensorDataset(torch.stack(test_data), torch.tensor(test_labels))


Training model for Laplacians - Epoch 1/10


  0%|          | 0/1 [00:00<?, ?it/s]