# Section 3

## Import libraries and define functions

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from warnings import simplefilter
#simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

from sklearn.model_selection import train_test_split

import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
#from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

#from sage.all import primes_first_n
import torch
import torch.nn as nn
import torch.optim as optim

#from sage.all import is_prime
import gc
import pickle
import ast
import math

import warnings
warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [None]:
def is_prime(n):
    if n < 2:
        return False
    # Check divisibility up to the square root of n
    for i in np.arange(2, int(np.sqrt(n)) + 1):
        if n % i == 0:
            return False
    return True

PRIME_COLS_BIG = [str(n+1) for n in range(1000) if is_prime(n+1)]
LfunctionTypes = ['Artin', 'BMF', 'CMF', 'DIR', 'ECNF', 'ECQ', 'G2Q', 'HMF'] 
# Does 'NF' really occur? Yes as the Riemann zeta, which we possibly want to remove

   
def build_lfunctions_df_big():
    """Creates the rational L-functions data frame from the data in """
    # Using the file downloaded from Zenodo 
    filename = 'lfun_rat_withap.txt'
    DF = pd.read_table(filename,delimiter=":",header='infer', low_memory=False)
    BadLtypes =  sorted(list(set(list(DF['instance_types']))))
    BadToGood = {}
    for badLtype in BadLtypes:
        good = []
        for Ltype in LfunctionTypes:
            if badLtype.count(Ltype) > 0:
                good.append(Ltype)
        good = tuple(good)
        BadToGood[badLtype] = good

    def bad_to_good_Ltypes(bad):
        return BadToGood[bad]

    DF['instance_types'] = DF.apply(lambda x: bad_to_good_Ltypes(x.instance_types), axis=1)
    return DF

def write_to_int(an_list):
    '''Function to convert the an strings to a list of ints, returns column labels and an list'''
    an_list = an_list.replace('[','')
    an_list = an_list.replace(']','')
    an_list = [int(an) for an in an_list.split(',')]
    #print('list length is ', len(an_list))
    return an_list

def write_to_hasse_normalized_primes_big(ap_list, w, d = 1):
    '''Function to convert the an strings to a list of normalized floats, returns column labels and an list of primes'''
    ap_list = write_to_int(ap_list)
    normalized_list = []
    for p, ap in zip(PRIME_COLS_BIG, ap_list):
        p = int(p)
        if not is_prime(p): continue
        normalization_quotient = (d*p**(w/2))**(-1)
        normalized_list.append(np.float32(round(ap * normalization_quotient, 5)))
    return normalized_list


def build_hasse_ap_df_big(DF):
    DF_new = pd.DataFrame()
    for rlf_label in DF.columns:
        if rlf_label == 'ap': continue
        DF_new[rlf_label] = DF[rlf_label].copy()
    DF_new[PRIME_COLUMNS] = [write_to_hasse_normalized_primes_big(a, w, d) for w, a, d in zip(DF['motivic_weight'], DF['ap'], DF['degree'])]
    return DF_new

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
def is_prime(n):
    if n < 2:
        return False
    # Check divisibility up to the square root of n
    for i in np.arange(2, int(np.sqrt(n)) + 1):
        if n % i == 0:
            return False
    return True
    
NUM_ANS = 1000
PRIME_COLUMNS = [str(n+1) for n in range(NUM_ANS) if is_prime(n+1)]

In [None]:
def calculate_accuracy(y_true, y_pred):
    _, predicted = torch.max(y_pred, 1)
    correct = (predicted == y_true).sum().item()
    accuracy = correct / y_true.size(0)
    return accuracy

In [None]:
#seed = 11167297796775735125  # Set your desired seed value
seed = 42
torch.manual_seed(seed)
np.random.seed(42)

## CNN on PRAT dataset

In [None]:
DF = pd.read_table('lfun_rat_withap.txt',delimiter=":",header='infer', low_memory=False)

In [None]:
DF = DF[(DF['primitive'] == True)&(DF['motivic_weight']==1) & (DF['degree']==4) & (DF['order_of_vanishing']<=3)]

In [None]:
DF_ap = build_hasse_ap_df_big(DF)
del DF
gc.collect()

In [None]:
from matplotlib.ticker import ScalarFormatter
conductors = DF_ap['conductor'].to_numpy().astype(int)

plt.figure(figsize=(10, 6))

# Create a histogram with 20 bins
sns.histplot(conductors, bins=100)

plt.title('Distribution of Conductor')
plt.xlabel('Conductor')
plt.ylabel('Frequency')
plt.gca().xaxis.set_major_formatter(ScalarFormatter())
plt.gca().xaxis.get_major_formatter().set_scientific(False)
plt.tight_layout()
plt.show()

In [None]:
from collections import Counter

def compute_instance_type_stats(dataframe, column_name, instance_types):
    # Flatten the lists in the column and create a Counter
    flattened = [item for sublist in dataframe[column_name].apply(ast.literal_eval) for item in list(set(sublist))]
    counts = Counter(flattened)

    # Print the count for each instance type
    for instance in instance_types:
        print(f"{instance}: {counts.get(instance, 0)}")

#possible_instance_types = ['CMF', 'ECQ', 'Artin', 'ECNF', 'BMF', 'HMF', 'DIR', 'G2Q']
possible_instance_types = ['ECNF', 'BMF', 'HMF', 'G2Q']

# Compute the statistics
compute_instance_type_stats(DF_ap, 'instance_types', possible_instance_types)

In [None]:
DF_train, DF_test = train_test_split(DF_ap, test_size=0.2, random_state=0)

In [None]:
compute_instance_type_stats(DF_train, 'instance_types', possible_instance_types)

In [None]:
# Check if test set is evenly distributed
compute_instance_type_stats(DF_test, 'instance_types', possible_instance_types)

In [None]:
# The lists of indices in the test set for each type
type_indices = {
    instance_type: DF_test['instance_types'].apply(ast.literal_eval).apply(lambda x: instance_type in x).values
    for instance_type in possible_instance_types
}

In [None]:
batch_size = 3000
feature_columns = PRIME_COLUMNS[:168]

# Extract feature columns and label
X_train = DF_train[feature_columns].values  # feature_columns are your input features
y_train = DF_train['order_of_vanishing'].values  # This is your label
X_test = DF_test[feature_columns].values  # feature_columns are your input features
y_test = DF_test['order_of_vanishing'].values  # This is your label

#del DF_train, DF_test, DF_ap
#gc.collect()

# Scale the data (optional but recommended for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for CNN (batch_size, channels, sequence_length)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])  # 1 channel
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])  # 1 channel

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
max_epochs = 25
saliency_value_list = []
test_accuracy_list = []

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)  # Larger kernel size
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, padding=1)

        # Dummy input to calculate flattened_size
        dummy_input = torch.zeros(1, 1, X_train.shape[-1])  # Example input size (batch_size, channels, length)
        self.flattened_size = self._get_flattened_size(dummy_input)

        self.fc1 = nn.Linear(self.flattened_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, num_classes)  # Final output layer for classification

        self.dropout = nn.Dropout(0.5)

    def _get_flattened_size(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling           
        return x.view(1, -1).size(1)  # Flatten and get size
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling

        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)  # Apply dropout
        x = torch.relu(self.fc1(x))  # Fully connected layer
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)  # Output layer
        return x

# Initialize model, loss function, and optimizer
num_classes = len(set(y_test.numpy()))
model = CNN(num_classes).to(device)
criterion = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Lists to store accuracy values
train_accuracies = []
test_accuracies = []
test_accuracies_type = {instance_type : [] for instance_type in possible_instance_types}

model.eval()
with torch.no_grad():
    
    # Test accuracies for different types
    for instance_type in possible_instance_types:
        test_outputs = model(X_test[type_indices[instance_type]].to(device))
        test_accuracy = calculate_accuracy(y_test[type_indices[instance_type]].to(device), test_outputs)
        test_accuracies_type[instance_type].append(test_accuracy) 
        
# Training the model
epochs = max_epochs
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    with torch.no_grad():
        # Train accuracy
        #train_outputs = model(X_train)
        #train_accuracy = calculate_accuracy(y_train, train_outputs)
        train_accuracy = calculate_accuracy(labels, outputs)
        train_accuracies.append(train_accuracy)

        # Test accuracy
        test_outputs = model(X_test.to(device))
        test_accuracy = calculate_accuracy(y_test.to(device), test_outputs)
        test_accuracies.append(test_accuracy)

        # Test accuracies for different types
        for instance_type in possible_instance_types:
            test_outputs = model(X_test[type_indices[instance_type]].to(device))
            test_accuracy = calculate_accuracy(y_test[type_indices[instance_type]].to(device), test_outputs)
            test_accuracies_type[instance_type].append(test_accuracy) 

    # Print every 10 epochs
    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

test_accuracy_list.append(test_accuracies)
#torch.save(model.state_dict(), f'Conductor_models/CNN_{conductor_min}_to_{conductor_min}.pth')

    
# Saliency Map Calculation
model.eval()  # Set the model to evaluation mode

# Get a single sample from the test set for saliency calculation
input_data = X_test[torch.randperm(X_test.size(0))[:3000]].clone().detach().requires_grad_(True)

# Forward pass
output = model(input_data.to(device))
_, predicted_class = torch.max(output, 1)  # Get the predicted class index

model.zero_grad()

# Create a one-hot encoding of the predicted classes (this is a binary mask)
one_hot = torch.zeros_like(output)
one_hot[torch.arange(output.size(0)), predicted_class] = 1

# Perform a single backward pass for the entire batch
output.backward(gradient=one_hot, retain_graph=True)

# The gradients for input_data will now be populated for the entire batch
saliency = input_data.grad  # Saliency map for each sample
    
# Average the saliency across the training set
saliency = saliency.abs().mean(dim=0).squeeze().detach().cpu().numpy()

saliency_value_list.append(saliency)

In [None]:
saliency_type_dict = dict.fromkeys(possible_instance_types, None)
for instance_type in possible_instance_types:

    # Get a single sample from the test set for saliency calculation
    X_test_type = X_test[type_indices[instance_type]]
    input_data = X_test_type[torch.randperm(X_test_type.size(0))[:3000]].clone().detach().requires_grad_(True)
    
    # Forward pass
    output = model(input_data.to(device))
    _, predicted_class = torch.max(output, 1)  # Get the predicted class index
    
    model.zero_grad()
    
    # Create a one-hot encoding of the predicted classes (this is a binary mask)
    one_hot = torch.zeros_like(output)
    one_hot[torch.arange(output.size(0)), predicted_class] = 1
    
    # Perform a single backward pass for the entire batch
    output.backward(gradient=one_hot, retain_graph=True)
    
    # The gradients for input_data will now be populated for the entire batch
    saliency = input_data.grad  # Saliency map for each sample
        
    # Average the saliency across the training set
    saliency = saliency.abs().mean(dim=0).squeeze().detach().cpu().numpy()
    
    saliency_type_dict[instance_type] = saliency

In [None]:
# Plotting train and test accuracy
plt.figure(figsize=(10, 6))
i = 0
plt.plot(range(1, epochs+1), test_accuracy_list[i], label='Total')
for instance_type in possible_instance_types:
    plt.plot(range(0, epochs+1), test_accuracies_type[instance_type], label=f'{instance_type}')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Test Accuracy over Epochs for Different Types')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
prime_numbers = [float(num) for num in feature_columns]
i = 0   
plt.scatter(prime_numbers, saliency_value_list[i], label='Total', s=1)
for instance_type in possible_instance_types:
    plt.scatter(prime_numbers, saliency_type_dict[instance_type], label=f'{instance_type}', s=1)
#plt.scatter(prime_numbers, np.log(prime_numbers) / prime_numbers / (np.log(10000)), label='log(p)/p', s=1)
plt.title('Saliency Map for Feature Importance')
plt.ylabel('Saliency (Gradient Magnitude)')  # Label for x-axis
plt.xlabel('p')  # Label for y-axis
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
prime_numbers = [float(num) for num in feature_columns]
i = 0
plt.scatter(prime_numbers, saliency_value_list[i]/np.max(saliency_value_list[i]), label='Total', s=1)
plt.scatter(prime_numbers, np.log(prime_numbers) / prime_numbers / (np.log(2)/2), label='log(p)/p', s=1)
for instance_type in possible_instance_types:
    plt.scatter(prime_numbers, saliency_type_dict[instance_type]/np.max(saliency_type_dict[instance_type]), 
                label=f'{instance_type}', s=1)
plt.title('Saliency Map for Feature Importance')
plt.ylabel('Saliency (normalized by max value)')  # Label for x-axis
plt.xlabel('p')  # Label for y-axis
plt.legend()
plt.grid(True)
plt.show()

In [None]:
for instance_type in possible_instance_types:
    print(instance_type, f'{test_accuracies_type[instance_type][-1]:.4f}')

### Transfer learning between ECNF and G2Q

In [None]:
#seed = 11167297796775735125  # Set your desired seed value
seed = 42
torch.manual_seed(seed)
np.random.seed(42)

#### G2Q

In [None]:
DF_G2Q = DF_ap[DF_ap['instance_types'].apply(ast.literal_eval).apply(lambda x: 'G2Q' in x).values]
DF_no_G2Q = DF_ap[~DF_ap['instance_types'].apply(ast.literal_eval).apply(lambda x: 'G2Q' in x).values]
DF_train, _ = train_test_split(DF_no_G2Q, test_size=0.2, random_state=0)
_, DF_test = train_test_split(DF_G2Q, test_size=0.9, random_state=0)

In [None]:
batch_size = 4000
feature_columns = PRIME_COLUMNS[:168]

# Extract feature columns and label
X_train = DF_train[feature_columns].values  # feature_columns are your input features
y_train = DF_train['order_of_vanishing'].values  # This is your label
X_test = DF_test[feature_columns].values  # feature_columns are your input features
y_test = DF_test['order_of_vanishing'].values  # This is your label

#del DF_train, DF_test, DF_ap
#gc.collect()

# Scale the data (optional but recommended for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for CNN (batch_size, channels, sequence_length)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])  # 1 channel
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])  # 1 channel

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
max_epochs = 25
saliency_value_list = []
test_accuracy_list = []

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)  # Larger kernel size
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, padding=1)

        # Dummy input to calculate flattened_size
        dummy_input = torch.zeros(1, 1, X_train.shape[-1])  # Example input size (batch_size, channels, length)
        self.flattened_size = self._get_flattened_size(dummy_input)

        self.fc1 = nn.Linear(self.flattened_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, num_classes)  # Final output layer for classification

        self.dropout = nn.Dropout(0.5)

    def _get_flattened_size(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling           
        return x.view(1, -1).size(1)  # Flatten and get size
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling

        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)  # Apply dropout
        x = torch.relu(self.fc1(x))  # Fully connected layer
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)  # Output layer
        return x

# Initialize model, loss function, and optimizer
num_classes = 5
model = CNN(num_classes).to(device)
criterion = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Lists to store accuracy values
train_accuracies = []
test_accuracies = []

model.eval()
with torch.no_grad():  
    test_outputs = model(X_test.to(device))
    test_accuracy = calculate_accuracy(y_test.to(device), test_outputs)
    test_accuracies.append(test_accuracy)
    
# Training the model
epochs = max_epochs
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    step = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if epoch == 0 and step == 0:
            model.eval()
            with torch.no_grad():
                train_accuracy = calculate_accuracy(labels, outputs)
                train_accuracies.append(train_accuracy)
            model.train()
        step += 1

    model.eval()
    with torch.no_grad():
        # Train accuracy
        #train_outputs = model(X_train)
        #train_accuracy = calculate_accuracy(y_train, train_outputs)
        train_accuracy = calculate_accuracy(labels, outputs)
        train_accuracies.append(train_accuracy)

        # Test accuracy
        test_outputs = model(X_test.to(device))
        test_accuracy = calculate_accuracy(y_test.to(device), test_outputs)
        test_accuracies.append(test_accuracy)

    # Print every 10 epochs
    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

test_accuracy_list.append(test_accuracies)
#torch.save(model.state_dict(), f'Conductor_models/CNN_{conductor_min}_to_{conductor_min}.pth')

    
# Saliency Map Calculation
model.eval()  # Set the model to evaluation mode

# Get a single sample from the test set for saliency calculation
input_data = X_test[torch.randperm(X_test.size(0))[:3000]].clone().detach().requires_grad_(True)

# Forward pass
output = model(input_data.to(device))
_, predicted_class = torch.max(output, 1)  # Get the predicted class index

model.zero_grad()

# Create a one-hot encoding of the predicted classes (this is a binary mask)
one_hot = torch.zeros_like(output)
one_hot[torch.arange(output.size(0)), predicted_class] = 1

# Perform a single backward pass for the entire batch
output.backward(gradient=one_hot, retain_graph=True)

# The gradients for input_data will now be populated for the entire batch
saliency = input_data.grad  # Saliency map for each sample
    
# Average the saliency across the training set
saliency = saliency.abs().mean(dim=0).squeeze().detach().cpu().numpy()

saliency_value_list.append(saliency)

In [None]:
step_per_epoch = math.ceil(len(X_train) / batch_size)
print(f'{step_per_epoch} steps per epoch')
plt.figure(figsize=(10, 6))
i = 0
plt.plot(range(0, epochs+1) , train_accuracies, label='Training with ECNF')
plt.plot(range(0, epochs+1) , test_accuracy_list[i], label='Test with G2Q')
#plt.plot([epoch*step_per_epoch/10 for epoch in range(1, epochs+1)] , test_accuracy_list[i], label='ECQ')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()
plt.grid(True)
plt.show()

#### ECNF

In [None]:
DF_ECNF = DF_ap[DF_ap['instance_types'].apply(ast.literal_eval).apply(lambda x: 'ECNF' in x).values]
DF_no_ECNF = DF_ap[~DF_ap['instance_types'].apply(ast.literal_eval).apply(lambda x: 'ECNF' in x).values]
DF_train, _ = train_test_split(DF_no_ECNF, test_size=0.2, random_state=0)
_, DF_test = train_test_split(DF_ECNF, test_size=0.4, random_state=0)

In [None]:
batch_size = 3000
feature_columns = PRIME_COLUMNS[:168]

# Extract feature columns and label
X_train = DF_train[feature_columns].values  # feature_columns are your input features
y_train = DF_train['order_of_vanishing'].values  # This is your label
X_test = DF_test[feature_columns].values  # feature_columns are your input features
y_test = DF_test['order_of_vanishing'].values  # This is your label

#del DF_train, DF_test, DF_ap
#gc.collect()

# Scale the data (optional but recommended for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for CNN (batch_size, channels, sequence_length)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])  # 1 channel
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])  # 1 channel

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
max_epochs = 25
saliency_value_list = []
test_accuracy_list = []

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)  # Larger kernel size
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, padding=1)

        # Dummy input to calculate flattened_size
        dummy_input = torch.zeros(1, 1, X_train.shape[-1])  # Example input size (batch_size, channels, length)
        self.flattened_size = self._get_flattened_size(dummy_input)

        self.fc1 = nn.Linear(self.flattened_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, num_classes)  # Final output layer for classification

        self.dropout = nn.Dropout(0.5)

    def _get_flattened_size(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling           
        return x.view(1, -1).size(1)  # Flatten and get size
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling

        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)  # Apply dropout
        x = torch.relu(self.fc1(x))  # Fully connected layer
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)  # Output layer
        return x

# Initialize model, loss function, and optimizer
num_classes = 5
model = CNN(num_classes).to(device)
criterion = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Lists to store accuracy values
train_accuracies = []
test_accuracies = []

model.eval()
with torch.no_grad():  
    test_outputs = model(X_test.to(device))
    test_accuracy = calculate_accuracy(y_test.to(device), test_outputs)
    test_accuracies.append(test_accuracy)
    
# Training the model
epochs = max_epochs
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    step = 0
    
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if epoch == 0 and step == 0:
            model.eval()
            with torch.no_grad():
                train_accuracy = calculate_accuracy(labels, outputs)
                train_accuracies.append(train_accuracy)
            model.train()
        step += 1

    model.eval()
    with torch.no_grad():
        # Train accuracy
        #train_outputs = model(X_train)
        #train_accuracy = calculate_accuracy(y_train, train_outputs)
        train_accuracy = calculate_accuracy(labels, outputs)
        train_accuracies.append(train_accuracy)

        # Test accuracy
        test_outputs = model(X_test.to(device))
        test_accuracy = calculate_accuracy(y_test.to(device), test_outputs)
        test_accuracies.append(test_accuracy)

    # Print every 10 epochs
    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

test_accuracy_list.append(test_accuracies)
#torch.save(model.state_dict(), f'Conductor_models/CNN_{conductor_min}_to_{conductor_min}.pth')

    
# Saliency Map Calculation
model.eval()  # Set the model to evaluation mode

# Get a single sample from the test set for saliency calculation
input_data = X_test[torch.randperm(X_test.size(0))[:3000]].clone().detach().requires_grad_(True)

# Forward pass
output = model(input_data.to(device))
_, predicted_class = torch.max(output, 1)  # Get the predicted class index

model.zero_grad()

# Create a one-hot encoding of the predicted classes (this is a binary mask)
one_hot = torch.zeros_like(output)
one_hot[torch.arange(output.size(0)), predicted_class] = 1

# Perform a single backward pass for the entire batch
output.backward(gradient=one_hot, retain_graph=True)

# The gradients for input_data will now be populated for the entire batch
saliency = input_data.grad  # Saliency map for each sample
    
# Average the saliency across the training set
saliency = saliency.abs().mean(dim=0).squeeze().detach().cpu().numpy()

saliency_value_list.append(saliency)

In [None]:
step_per_epoch = math.ceil(len(X_train) / batch_size)
print(f'{step_per_epoch} steps per epoch')
plt.figure(figsize=(10, 6))
i = 0
plt.plot(range(0, epochs+1) , train_accuracies, label='Training with G2Q')
plt.plot(range(0, epochs+1) , test_accuracy_list[i], label='Test with ECNF')
#plt.plot([epoch*step_per_epoch/10 for epoch in range(1, epochs+1)] , test_accuracy_list[i], label='ECQ')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()
plt.grid(True)
plt.show()

## Train with principal components

In [None]:
DF_PCA = DF_ap
feature_columns = PRIME_COLUMNS
X = DF_PCA[feature_columns]
y = DF_PCA['label']
pca = PCA(n_components=168)
principal_components = pca.fit_transform(X)

pca_df = pd.DataFrame(data=principal_components, columns=[f'PC{i+1}' for i in range(len(feature_columns))])
pca_df['label'] = y.values

col = 'order_of_vanishing'
vals = DF_PCA[col].value_counts()

color = DF_PCA[col]

plt.figure(figsize=(8, 6))
plt.scatter(pca_df['PC1'], pca_df['PC2'], c=color, alpha=0.7, s=10, cmap='viridis')

# Add labels and title
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.title(f'2D PCA colored by {col}')

# Add color bar
plt.colorbar(label=col)

# Show the plot
plt.show()

In [None]:
pca_df = pca_df.merge(DF_PCA[['label', 'instance_types', 'order_of_vanishing']], on='label', how='left')

In [None]:
DF_train, DF_test = train_test_split(pca_df, test_size=0.2, random_state=0)

In [None]:
batch_size = 3000
feature_columns = PRIME_COLUMNS[:168]
PC_columns = [f'PC{i+1}' for i in range(len(feature_columns))]

# Extract feature columns and label
X_train = DF_train[PC_columns].values  # feature_columns are your input features
y_train = DF_train['order_of_vanishing'].values  # This is your label
X_test = DF_test[PC_columns].values  # feature_columns are your input features
y_test = DF_test['order_of_vanishing'].values  # This is your label

#del DF_train, DF_test, DF_ap
#gc.collect()

# Scale the data (optional but recommended for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for CNN (batch_size, channels, sequence_length)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])  # 1 channel
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])  # 1 channel

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
max_epochs = 25
saliency_value_list = []
test_accuracy_list = []

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)  # Larger kernel size
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, padding=1)

        # Dummy input to calculate flattened_size
        dummy_input = torch.zeros(1, 1, X_train.shape[-1])  # Example input size (batch_size, channels, length)
        self.flattened_size = self._get_flattened_size(dummy_input)

        self.fc1 = nn.Linear(self.flattened_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, num_classes)  # Final output layer for classification

        self.dropout = nn.Dropout(0.5)

    def _get_flattened_size(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling           
        return x.view(1, -1).size(1)  # Flatten and get size
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling

        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)  # Apply dropout
        x = torch.relu(self.fc1(x))  # Fully connected layer
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)  # Output layer
        return x

# Initialize model, loss function, and optimizer
num_classes = len(set(y_test.numpy()))
model = CNN(num_classes).to(device)
criterion = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Lists to store accuracy values
train_accuracies = []
test_accuracies = []
test_accuracies_type = {instance_type : [] for instance_type in possible_instance_types}

model.eval()
with torch.no_grad():
    
    # Test accuracies for different types
    for instance_type in possible_instance_types:
        test_outputs = model(X_test[type_indices[instance_type]].to(device))
        test_accuracy = calculate_accuracy(y_test[type_indices[instance_type]].to(device), test_outputs)
        test_accuracies_type[instance_type].append(test_accuracy) 
        
# Training the model
epochs = max_epochs
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    with torch.no_grad():
        # Train accuracy
        #train_outputs = model(X_train)
        #train_accuracy = calculate_accuracy(y_train, train_outputs)
        train_accuracy = calculate_accuracy(labels, outputs)
        train_accuracies.append(train_accuracy)

        # Test accuracy
        test_outputs = model(X_test.to(device))
        test_accuracy = calculate_accuracy(y_test.to(device), test_outputs)
        test_accuracies.append(test_accuracy)

        # Test accuracies for different types
        for instance_type in possible_instance_types:
            test_outputs = model(X_test[type_indices[instance_type]].to(device))
            test_accuracy = calculate_accuracy(y_test[type_indices[instance_type]].to(device), test_outputs)
            test_accuracies_type[instance_type].append(test_accuracy) 

    # Print every 10 epochs
    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

test_accuracy_list.append(test_accuracies)
#torch.save(model.state_dict(), f'Conductor_models/CNN_{conductor_min}_to_{conductor_min}.pth')

    
# Saliency Map Calculation
model.eval()  # Set the model to evaluation mode

# Get a single sample from the test set for saliency calculation
input_data = X_test[torch.randperm(X_test.size(0))[:3000]].clone().detach().requires_grad_(True)

# Forward pass
output = model(input_data.to(device))
_, predicted_class = torch.max(output, 1)  # Get the predicted class index

model.zero_grad()

# Create a one-hot encoding of the predicted classes (this is a binary mask)
one_hot = torch.zeros_like(output)
one_hot[torch.arange(output.size(0)), predicted_class] = 1

# Perform a single backward pass for the entire batch
output.backward(gradient=one_hot, retain_graph=True)

# The gradients for input_data will now be populated for the entire batch
saliency = input_data.grad  # Saliency map for each sample
    
# Average the saliency across the training set
saliency = saliency.abs().mean(dim=0).squeeze().detach().cpu().numpy()

saliency_value_list.append(saliency)

In [None]:
saliency_type_dict = dict.fromkeys(possible_instance_types, None)
for instance_type in possible_instance_types:

    # Get a single sample from the test set for saliency calculation
    X_test_type = X_test[type_indices[instance_type]]
    input_data = X_test_type[torch.randperm(X_test_type.size(0))[:3000]].clone().detach().requires_grad_(True)
    
    # Forward pass
    output = model(input_data.to(device))
    _, predicted_class = torch.max(output, 1)  # Get the predicted class index
    
    model.zero_grad()
    
    # Create a one-hot encoding of the predicted classes (this is a binary mask)
    one_hot = torch.zeros_like(output)
    one_hot[torch.arange(output.size(0)), predicted_class] = 1
    
    # Perform a single backward pass for the entire batch
    output.backward(gradient=one_hot, retain_graph=True)
    
    # The gradients for input_data will now be populated for the entire batch
    saliency = input_data.grad  # Saliency map for each sample
        
    # Average the saliency across the training set
    saliency = saliency.abs().mean(dim=0).squeeze().detach().cpu().numpy()
    
    saliency_type_dict[instance_type] = saliency

In [None]:
# Plotting train and test accuracy
plt.figure(figsize=(10, 6))
i = 0
plt.plot(range(1, epochs+1), test_accuracy_list[i], label='Total')
for instance_type in possible_instance_types:
    plt.plot(range(0, epochs+1), test_accuracies_type[instance_type], label=f'{instance_type}')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Test Accuracy over Epochs for Different Types (Trained with all PCs)')
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
prime_numbers = [float(num) for num in feature_columns]
i = 0   
plt.scatter(prime_numbers[:10], saliency_value_list[i][:10], label='Total', s=3)
for instance_type in possible_instance_types:
    plt.scatter(prime_numbers[:10], saliency_type_dict[instance_type][:10], label=f'{instance_type}', s=3)
#plt.scatter(prime_numbers, np.log(prime_numbers) / prime_numbers / (np.log(10000)), label='log(p)/p', s=1)
plt.title('Saliency Map for Feature Importance')
plt.ylabel('Saliency (Gradient Magnitude)')  # Label for x-axis
plt.xlabel('p')  # Label for y-axis
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(10, 6))
prime_numbers = [float(num) for num in feature_columns]
i = 0
plt.scatter(prime_numbers[:10], saliency_value_list[i][:10]/np.max(saliency_value_list[i][:10]), label='Total', s=5)
#plt.scatter(prime_numbers, np.log(prime_numbers) / prime_numbers / (np.log(2)/2), label='log(p)/p', s=1)
for instance_type in possible_instance_types:
    plt.scatter(prime_numbers[:10], saliency_type_dict[instance_type][:10]/np.max(saliency_type_dict[instance_type][:10]), 
                label=f'{instance_type}', s=5)
plt.title('Saliency Map for Feature Importance')
plt.ylabel('Saliency (normalized by max value)')  # Label for x-axis
plt.xlabel('p')  # Label for y-axis
plt.legend()
plt.grid(True)
plt.show()

In [None]:
for instance_type in possible_instance_types:
    print(instance_type, f'{test_accuracies_type[instance_type][-1]:.4f}')

### Trained with PC1 and PC2

In [None]:
batch_size = 3000
feature_columns = PRIME_COLUMNS[:2]
PC_columns = [f'PC{i+1}' for i in range(len(feature_columns))]

# Extract feature columns and label
X_train = DF_train[PC_columns].values  # feature_columns are your input features
y_train = DF_train['order_of_vanishing'].values  # This is your label
X_test = DF_test[PC_columns].values  # feature_columns are your input features
y_test = DF_test['order_of_vanishing'].values  # This is your label

#del DF_train, DF_test, DF_ap
#gc.collect()

# Scale the data (optional but recommended for neural networks)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for CNN (batch_size, channels, sequence_length)
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])  # 1 channel
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])  # 1 channel

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
max_epochs = 25
saliency_value_list = []
test_accuracy_list = []

class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)  # Larger kernel size
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2, padding=1)

        # Dummy input to calculate flattened_size
        dummy_input = torch.zeros(1, 1, X_train.shape[-1])  # Example input size (batch_size, channels, length)
        self.flattened_size = self._get_flattened_size(dummy_input)

        self.fc1 = nn.Linear(self.flattened_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, num_classes)  # Final output layer for classification

        self.dropout = nn.Dropout(0.5)

    def _get_flattened_size(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling           
        return x.view(1, -1).size(1)  # Flatten and get size
    
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # First conv layer + pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Second conv layer + pooling
        x = self.pool(torch.relu(self.conv3(x)))  # Third conv layer + pooling

        x = x.view(x.size(0), -1)  # Flatten
        x = self.dropout(x)  # Apply dropout
        x = torch.relu(self.fc1(x))  # Fully connected layer
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)  # Output layer
        return x

# Initialize model, loss function, and optimizer
num_classes = len(set(y_test.numpy()))
model = CNN(num_classes).to(device)
criterion = nn.CrossEntropyLoss()  # Loss function for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Lists to store accuracy values
train_accuracies = []
test_accuracies = []
test_accuracies_type = {instance_type : [] for instance_type in possible_instance_types}

model.eval()
with torch.no_grad():
    
    # Test accuracies for different types
    for instance_type in possible_instance_types:
        test_outputs = model(X_test[type_indices[instance_type]].to(device))
        test_accuracy = calculate_accuracy(y_test[type_indices[instance_type]].to(device), test_outputs)
        test_accuracies_type[instance_type].append(test_accuracy) 
        
# Training the model
epochs = max_epochs
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    with torch.no_grad():
        # Train accuracy
        #train_outputs = model(X_train)
        #train_accuracy = calculate_accuracy(y_train, train_outputs)
        train_accuracy = calculate_accuracy(labels, outputs)
        train_accuracies.append(train_accuracy)

        # Test accuracy
        test_outputs = model(X_test.to(device))
        test_accuracy = calculate_accuracy(y_test.to(device), test_outputs)
        test_accuracies.append(test_accuracy)

        # Test accuracies for different types
        for instance_type in possible_instance_types:
            test_outputs = model(X_test[type_indices[instance_type]].to(device))
            test_accuracy = calculate_accuracy(y_test[type_indices[instance_type]].to(device), test_outputs)
            test_accuracies_type[instance_type].append(test_accuracy) 

    # Print every 10 epochs
    if (epoch + 1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, '
              f'Train Accuracy: {train_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}')

test_accuracy_list.append(test_accuracies)
#torch.save(model.state_dict(), f'Conductor_models/CNN_{conductor_min}_to_{conductor_min}.pth')

    
# Saliency Map Calculation
model.eval()  # Set the model to evaluation mode

# Get a single sample from the test set for saliency calculation
input_data = X_test[torch.randperm(X_test.size(0))[:3000]].clone().detach().requires_grad_(True)

# Forward pass
output = model(input_data.to(device))
_, predicted_class = torch.max(output, 1)  # Get the predicted class index

model.zero_grad()

# Create a one-hot encoding of the predicted classes (this is a binary mask)
one_hot = torch.zeros_like(output)
one_hot[torch.arange(output.size(0)), predicted_class] = 1

# Perform a single backward pass for the entire batch
output.backward(gradient=one_hot, retain_graph=True)

# The gradients for input_data will now be populated for the entire batch
saliency = input_data.grad  # Saliency map for each sample
    
# Average the saliency across the training set
saliency = saliency.abs().mean(dim=0).squeeze().detach().cpu().numpy()

saliency_value_list.append(saliency)

In [None]:
loadings = pca.components_

# Extract PC1 and PC2 weights
pc1_loadings = loadings[0]
pc2_loadings = loadings[1]
#pc3_loadings = loadings[2]

# Feature indices for plotting
feature_indices = np.arange(168)

# Plot PC1 weights
plt.figure(figsize=(8, 6))
plt.scatter(feature_indices, pc1_loadings, alpha=0.7, label="PC1 Loadings",s=3)
plt.scatter(feature_indices, pc2_loadings, alpha=0.7, label="PC2 Loadings", s=3)
#plt.scatter(feature_indices, pc3_loadings, alpha=0.7, label="PC3 Loadings", s=2)
plt.axhline(0, color='gray', linestyle='--', linewidth=1)
plt.xlabel("Feature Index")
plt.ylabel("Loading Value")
plt.title("PCA Loadings for PC1 and PC2")
plt.legend()
plt.show()

In [None]:
# Plotting train and test accuracy
plt.figure(figsize=(10, 6))
i = 0
plt.plot(range(1, epochs+1), test_accuracy_list[i], label='Total')
for instance_type in possible_instance_types:
    plt.plot(range(0, epochs+1), test_accuracies_type[instance_type], label=f'{instance_type}')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Test Accuracy over Epochs for Different Types (Trained with PC1 and PC2)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
for instance_type in possible_instance_types:
    print(instance_type, f'{test_accuracies_type[instance_type][-1]:.4f}')