### Neural Network Model for Comparison
In comparison to state of the art models, we train a neural network on the normalized data including the raw data. 
Since our dataset comparises image data, a natural thing to do is compare with a CNN model. However, we do not compare with a CNN model because the resolution of the data is small. The data are 10x10 images. We trained CNN models, though but did not get any better results.

In [30]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

class DynamicBinaryClassifier(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(DynamicBinaryClassifier, self).__init__()
        layers = []
        prev_size = input_size
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            prev_size = hidden_size
        self.hidden_layers = nn.Sequential(*layers)
        self.output_layer = nn.Linear(prev_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.hidden_layers(x)
        x = self.output_layer(x)
        x = self.sigmoid(x)
        return x

class BinaryClassifierTrainer:
    def __init__(self, model, criterion, optimizer, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.model = model.to(device)
        self.criterion = criterion
        self.optimizer = optimizer
        self.device = device

    def _to_tensor(self, data):
        if not torch.is_tensor(data):
            data = torch.tensor(data, dtype=torch.float32)
        return data.to(self.device)

    def train(self, input_data, labels, num_epochs=100, batch_size=32):
        input_data, labels = self._to_tensor(input_data), self._to_tensor(labels)

        for epoch in range(num_epochs):
            total_loss = 0.0

            # Forward pass and calculate loss
            for i in range(0, len(input_data), batch_size):
                batch_input = input_data[i:i + batch_size]
                batch_labels = labels[i:i + batch_size]

                outputs = self.model(batch_input)
                loss = self.criterion(outputs, batch_labels)

                # Backward pass and optimization
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                total_loss += loss.item()

            average_loss = total_loss / (len(input_data) / batch_size)

            # Print the average loss every 100 epochs
            if (epoch + 1) % 100 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Average Loss: {average_loss:.8f}')

    def evaluate(self, input_data, labels):
        input_data, labels = self._to_tensor(input_data), self._to_tensor(labels)

        with torch.no_grad():
            outputs = self.model(input_data)
            predictions = (outputs > 0.5).float()

        accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
        return accuracy
    
from glob import glob

# Example usage:
input_size = 100
hidden_sizes = [70,64, 32]
output_size = 1

model = DynamicBinaryClassifier(input_size, hidden_sizes, output_size)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

trainer = BinaryClassifierTrainer(model, criterion, optimizer)


# read data
batch_size = 128
files = glob('../../data/data-norm/max-only/*.csv')
file_names = [f.split('/')[-1] for f in files]
results = {name:[] for name in file_names}
for name in results:
    for _ in range(3):
        print(name)
        dat = pd.read_csv(f'../../data/data-norm/max-only/{name}')
        data = dat.iloc[:, 1:].values
        labels = dat.iloc[:, 0].values.reshape(-1, 1)
        # Split the data into training and test sets
        input_train, input_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.2, random_state=42)
        # Training
        trainer.train(input_train, labels_train, num_epochs=1000, batch_size=batch_size)
        # Evaluation
        test_accuracy = trainer.evaluate(input_test, labels_test)
        print(f'Test Accuracy: {test_accuracy:.4f}')
        results[name].append(test_accuracy)
data_accuracies = pd.DataFrame(results)
data_accuracies.to_csv('../../data/data-norm/accuracies.csv', index=False)

# dat = pd.read_csv('../../data/data-norm/max-only/raw_image_data.csv')
# data = dat.iloc[:, 1:].values
# labels = dat.iloc[:, 0].values.reshape(-1, 1)

# # Split the data into training and test sets
# input_train, input_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# # Training
# trainer.train(input_train, labels_train, num_epochs=1000, batch_size=32)

# # Evaluation
# test_accuracy = trainer.evaluate(input_test, labels_test)
# print(f'Test Accuracy: {test_accuracy:.4f}')


nthroot_mm0.3448.csv
Epoch [100/1000], Average Loss: 0.52492991
Epoch [200/1000], Average Loss: 0.49444794
Epoch [300/1000], Average Loss: 0.47711120
Epoch [400/1000], Average Loss: 0.46212206
Epoch [500/1000], Average Loss: 0.44825004
Epoch [600/1000], Average Loss: 0.43483786
Epoch [700/1000], Average Loss: 0.42215778
Epoch [800/1000], Average Loss: 0.40958082
Epoch [900/1000], Average Loss: 0.39741642
Epoch [1000/1000], Average Loss: 0.38509817
Test Accuracy: 0.7417
nthroot_mm0.3448.csv
Epoch [100/1000], Average Loss: 0.37263244
Epoch [200/1000], Average Loss: 0.36068836
Epoch [300/1000], Average Loss: 0.34939292
Epoch [400/1000], Average Loss: 0.33820136
Epoch [500/1000], Average Loss: 0.32753672
Epoch [600/1000], Average Loss: 0.31760112
Epoch [700/1000], Average Loss: 0.30822178
Epoch [800/1000], Average Loss: 0.30515712
Epoch [900/1000], Average Loss: 0.29098924
Epoch [1000/1000], Average Loss: 0.28197785
Test Accuracy: 0.7389
nthroot_mm0.3448.csv
Epoch [100/1000], Average Loss:

In [2]:
import pandas as pd
data_accuracies = pd.read_csv('../../data/data-norm/accuracies.csv')

In [5]:
data_accuracies.mean(axis=0).nlargest(20)

nthroot_mm0.5517.csv    0.760778
nthroot_0.5862.csv      0.750076
nthroot_0.4828.csv      0.749465
nthroot_0.5172.csv      0.747997
nthroot_mm0.7586.csv    0.746958
nthroot_0.7931.csv      0.746102
nthroot_0.3793.csv      0.744023
nthroot_mm0.1034.csv    0.743472
nthroot_mm0.3103.csv    0.741760
nthroot_mm0.5172.csv    0.741699
nthroot_mm0.3448.csv    0.740659
nthroot_mm0.6552.csv    0.740598
nthroot_mm0.4483.csv    0.739864
nthroot_mm0.8276.csv    0.737724
nthroot_mm0.4828.csv    0.737602
nthroot_mm0.1724.csv    0.736623
nthroot_mm0.3793.csv    0.736379
nthroot_1.0.csv         0.736012
norm_1.csv              0.734666
nthroot_mm0.931.csv     0.734116
dtype: float64

Of all normalizations as well as the raw data, the $r^{th}$ with min-max over each image performs the best at an average of $76\%$ accuracy on $20\%$ test data for neural nets models.