In [None]:
# file-handling
import os 

# user status updates
import time
from time import gmtime, strftime
from tqdm import tqdm
from IPython.display import display, Latex
from datetime import datetime

# the holy trinity of python data science
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd

# scipy
import scipy
from scipy import sparse, linalg, fft
from scipy.linalg import expm, sinm, cosm
import scipy.integrate as integrate
from scipy.integrate import quad

# torch 
import torch
import torch.nn as nn
import torch.nn.functional as F

# parallelization, memory management
from joblib import Parallel, delayed
from numba import jit, njit, prange
import copy

# itertools
import itertools

############ Macros ###############
NP_RANDOM_SEED = 0
TORCH_RANDOM_SEED = 0
np.random.seed(NP_RANDOM_SEED)
torch.manual_seed(TORCH_RANDOM_SEED)

# Physical constants
N = 8 # number of fermions
J = 100 # ~"energy scale" of couplings
Q_COUPLING = 3 # order of coupling, don't want to use the letter 'Q' because that denotes the supercharge
N_DIM = 2**N # Hilbert space dimension 
FIRST_NONZERO = 0.15 # first nonzero eigenvalue, for our intents and purposes

# Computer stuff
N_SAMPLES = 300 # number of samples to generate
N_JOBS = 20 # number of jobs to run in parallel
MC_SIMULATED = False
MC_RANDOM_SIMULATED = False

# Model stuff
LR = 1e-5 # learning rate
BATCH_SIZE = 50 # batch size for training the neural network
N_EPOCHS = 5 # number of epochs to train the neural network
R_TRAIN = 0.5 # desired ratio of zero eigenvalues to total eigenvalues IN TRAINING SET
TEST_RANDOM = True # whether to train and test the randomly-trained model

# Directories
N2_SUSY_DIR = os.path.join("Excel", "N2_SUSY_SYK")
RESULT_DIR = os.path.join(N2_SUSY_DIR, "Simulated Hamiltonians", f"N{N}_J{J}")
CNN_DIR = os.path.join(N2_SUSY_DIR, "CNN", f"N{N}_J{J}")
os.makedirs(CNN_DIR, exist_ok=True)

# 1. Data

##       1.1 Load data

In [None]:
N_IV = N_SAMPLES*N_DIM
ivals_all = np.zeros(shape=(N_IV,), dtype=np.float64)
ivecs_all = np.zeros(shape=(N_IV, N_DIM), dtype=np.complex128)

for i in range(N_SAMPLES):
    ivals_i = np.load(os.path.join(RESULT_DIR, f"ivals_{i}.npy"))
    ivecs_i = np.load(os.path.join(RESULT_DIR, f"ivecs_{i}.npy"))
    ivals_all[i*N_DIM:(i+1)*N_DIM] = ivals_i
    ivecs_all[i*N_DIM:(i+1)*N_DIM,:] = ivecs_i

labels_all = (ivals_all>=FIRST_NONZERO).astype(int) # zero-energy eigenvectors labeled with 0, finite-energy eigenvectors labeled with 1
print(f"ivals_all.shape: {ivals_all.shape}")
print(f"ivecs_all.shape: {ivecs_all.shape}")
print(f"labels_all.shape: {labels_all.shape}")

## 1.2 Convert (1xN_DIM) complex into (1xN_DIMx2) real

Torch doesn't currently have handling for complex numbers

In [None]:
ivecs_all_new = np.zeros(shape=(N_IV, 2, N_DIM), dtype=np.float64)

for i in range(N_IV):
    ivecs_all_i = ivecs_all[i]
    ivecs_all_new[i,0,:] = ivecs_all_i.real
    ivecs_all_new[i,1,:] = ivecs_all_i.real

ivecs_all = ivecs_all_new
del ivecs_all_new

print(f"ivecs_all.shape = {ivecs_all.shape}")

## 1.3 Train-test-validate split

In [None]:
# first, random shuffle
shuffler = np.random.permutation(N_IV)
ivecs_all = ivecs_all[shuffler]
labels_all = labels_all[shuffler]

p_train = 0.7
n_train = int(p_train*N_IV)
X_train = ivecs_all[:n_train]
y_train = labels_all[:n_train]

p_val = 0.2
n_val = int(p_val*N_IV)
X_val = ivecs_all[n_train:n_train+n_val]
y_val = labels_all[n_train:n_train+n_val]

p_test = 0.1
n_test = int(p_test*N_IV)
X_test = ivecs_all[n_train+n_val:]
y_test = labels_all[n_train+n_val:]

## 1.4 Load extra zero-energy data to balance training dataset, if specified

In [None]:
LOAD_EXTRA_ZEROS = True
if LOAD_EXTRA_ZEROS:

    # Load the data 
    ivals_all_0 = []
    ivecs_all_0 = []
    for i in range(N_SAMPLES, N_SAMPLES+100):
        ivals_i = np.load(os.path.join(RESULT_DIR, f"ivals_{i}.npy"))
        ivecs_i = np.load(os.path.join(RESULT_DIR, f"ivecs_{i}.npy"))

        ivals_i_0_idx = np.where(ivals_i < FIRST_NONZERO)[0]
        #print(f"ivals_i_0_idx = {ivals_i_0_idx}")
        ivals_i_0 = [ivals_i[i] for i in ivals_i_0_idx]
        ivecs_i_0 = [ivecs_i[i,:] for i in ivals_i_0_idx]
        ivals_all_0.extend(ivals_i_0)
        ivecs_all_0.extend(ivecs_i_0)

    ivals_all_0 = np.array(ivals_all_0)
    ivecs_all_0 = np.array(ivecs_all_0)

    # Reshape complex 1xN_DIM into real 2xN_DIM
    # ivecs_all_0_new = np.zeros(shape=(ivecs_all_0.shape[0], N_DIM), dtype=np.float64)
    # for i in range(ivecs_all_0.shape[0]):
    #     ivecs_all_0_i = ivecs_all_0[i]
    #     ivecs_all_0_new[i] = np.concatenate((ivecs_all_0_i.real, ivecs_all_0_i.imag), axis=0)
    # ivecs_all_0 = ivecs_all_0_new
    # del ivecs_all_0_new
    # print(f"ivals_all_0.shape = {ivals_all_0.shape}")
    # print(f"ivecs_all_0.shape = {ivecs_all_0.shape}")

    ivecs_all_0_new = np.zeros(shape=(ivecs_all_0.shape[0], 2, N_DIM), dtype=np.float64)
    for i in range(ivecs_all_0.shape[0]):
        ivecs_all_0_i = ivecs_all_0[i]
        ivecs_all_0_new[i,0,:] = ivecs_all_0_i.real
        ivecs_all_0_new[i,1,:] = ivecs_all_0_i.real
    ivecs_all_0 = ivecs_all_0_new
    del ivecs_all_0_new
    print(f"ivecs_all_0.shape = {ivecs_all_0.shape}")


    X_train_0 = ivecs_all_0
    y_train_0 = np.array(ivals_all_0 < FIRST_NONZERO, dtype=np.int64)
    X_train = np.concatenate((X_train, X_train_0), axis=0)
    y_train = np.concatenate((y_train, y_train_0), axis=0)

    print(f"X_train.shape = {X_train.shape}")
    print(f"y_train.shape = {y_train.shape}")

## 1.5 Balance training set

$r_{train} = $ desired proportion of zero-energy eigenvectors

$N_0 =$ number of zero-energy eigenvectors

$N_f =$ number of finite-energy eigenvectors

$N_{add}$ number of zero-energy eigenvectors we must add to achieve $r_{want}$



   :


To find $N_{add}$, we start off with an identity:

$r_{train} \equiv \frac{N_0+N_{add}}{N_0+N_f+N_{add}} $

$N_0+N_{add} = r_{train} (N_0+N_f+N_{add})$

$N_0+N_{add} = r_{train}*N_0+r_{train}*N_f+r_{train}*N_{add}$

$N_{add}(1-r_{train}) = N_0(r_{train}-1)+r_{train}*N_f$


And finally,

$N_{add} = \frac{N_0(r_{train}-1)+N_f*r_{train}}{1-r_{train}}$

In [None]:
N_0 = sum(y_train==0)
N_f = sum(y_train==1)
N_add = (N_0*(R_TRAIN-1)+N_f*R_TRAIN)/(1-R_TRAIN)

X_train_0 = X_train[y_train==0]
X_train_0_clone_idx = np.random.choice(range(X_train_0.shape[0]), size=int(N_add), replace=True)
X_train_0_clone = copy.deepcopy(X_train_0[X_train_0_clone_idx])
y_train_0_clone = np.zeros(shape=(X_train_0_clone.shape[0],), dtype=np.int32)

X_train = np.concatenate((X_train, X_train_0_clone), axis=0)
y_train = np.concatenate((y_train, y_train_0_clone), axis=0)

print(f"X_train.shape: {X_train.shape}")
print(f"N_0: {sum(y_train==0)/len(y_train)}")


## 1.6 Convert to Torch tensors that CNN can read

In [None]:
X_train = torch.from_numpy(X_train).float().cuda()
y_train = torch.from_numpy(y_train).float().cuda()

X_val = torch.from_numpy(X_val).float().cuda()
y_val = torch.from_numpy(y_val).float().cuda()

X_test = torch.from_numpy(X_test).float().cuda()
y_test = torch.from_numpy(y_test).float().cuda()

# 2. CNN

In [None]:
X_i = X_train[0:3]
print(f"X_i.shape: {X_i.shape}")

In [None]:
conv1 = nn.Conv1d(2, 1, N_DIM//16, stride=1)
conv1.cuda()
X1 = conv1(X_i)
print(f"X1.shape: {X1.shape}")

In [None]:
mp1 = nn.MaxPool1d(kernel_size=N_DIM//64, stride=1)
mp1.cuda()
X2 = mp1(X1)
print(f"X2.shape = {X2.shape}")

In [None]:
drop1 = nn.Dropout(p=0.5)
drop1.cuda()
X3 = drop1(X2)
print(f"X3.shape = {X3.shape}")

In [None]:
class BinaryClassifierCNN(nn.Module):
    def __init__(self):
        
        super(BinaryClassifierCNN, self).__init__()
        
        self.conv1 = nn.Conv1d(2, 1, N_DIM//16, stride=1)
        self.mp1 = nn.MaxPool1d(kernel_size=N_DIM//64, stride=1)
        self.drop1 = nn.Dropout(p=0.5)
        #self.conv2 = nn.Conv1d(2, 2, N_DIM//8, stride=1)
        #self.conv3 = nn.Conv1d(2, 1, N_DIM//16, stride=2)
        #self.fc1 = nn.Linear(290, 100)
        #self.fc2 = nn.Linear(100, 10)
        #self.fc3 = nn.Linear(10, 1)
        self.fc1 = nn.Linear(238, 100)
        self.fc2 = nn.Linear(100, 20)
        self.fc3 = nn.Linear(20, 1)
        #self.fc4 = nn.Linear(10, 1)


    # Function to pass data forward through network.
    def forward(self, x):
        out1 = F.relu(self.conv1(x))
        out2 = self.drop1(self.mp1(out1))
        # out2 = F.relu(self.conv2(out1))
        # out3 = F.relu(self.conv3(out2))
        # out4 = F.relu(self.fc1(out3))
        # out5 = F.relu(self.fc2(out4))
        # out6 = F.sigmoid(self.fc3(out5))
        
        # return torch.squeeze(out6, axis=1)

        out3 = F.relu(self.fc1(out2))
        out4 = F.relu(self.fc2(out3))
        out5 = F.sigmoid(self.fc3(out4))
        #out6 = F.sigmoid(self.fc4(out5))
        return torch.squeeze(out5, axis=1)

# Initialize model and random model 
model = BinaryClassifierCNN()
model.cuda()

model_random = BinaryClassifierCNN()
model_random.cuda()


# If you re-instantiate the models, must restart the run counters, run dicts, and loss lists.
RUN_COUNTER = 0 
RUNS_DICT = {}
RUNS_DICT_RANDOM = {}

mean_train_loss_list = []
train_loss_list = []
val_acc_list = []
pos_pred_list = []

mean_train_loss_list_random = []
train_loss_list_random = []
val_acc_list_random = []
pos_pred_list_random = []

In [None]:
X_out = model(X_i)
print(f"X_out.shape = {X_out.shape}")

# 3. Train

## 3.1 Define training function

In [None]:
def train(model, optimizer, loss_func, X_train, y_train, X_val, y_val, batch_size, n_epochs):
    mean_train_loss_list = []
    train_loss_list = []
    validation_acc_list = []
    pos_pred_list = []
    n_batch = X_train.shape[0]//batch_size

    tic = time.time()
    for i in tqdm(range(n_epochs)):
        # shuffle samples
        shuffler = np.random.permutation(X_train.shape[0])
        X_train = X_train[shuffler]
        y_train = y_train[shuffler]

        optimizer.zero_grad()

        train_loss_i = []
        val_acc_i = []
        pos_pred_i = []

        for j in range(n_batch):
            X_i = X_train[j*batch_size:(j+1)*batch_size]
            y_i = y_train[j*batch_size:(j+1)*batch_size]

            # Forward pass
            output = torch.squeeze(model(X_i))
            loss_ij = loss_func(output, y_i)
            train_loss_i.append(loss_ij.item())

            loss_ij.backward()
            optimizer.step()

            with torch.no_grad():
                # Validation accuracy
                output_val = torch.round(torch.squeeze(model(X_val))).detach().cpu().numpy()

                val_acc_ij = (output_val == y_val.detach().cpu().numpy()).sum()/y_val.shape[0]
                val_acc_i.append(val_acc_ij)

                pos_pred_ij = sum(output_val)/len(output_val)
                pos_pred_i.append(pos_pred_ij)

        mean_train_loss_list.append(np.mean(np.array(train_loss_i)))
        train_loss_list.extend(train_loss_i)
        validation_acc_list.extend(val_acc_i)
        pos_pred_list.extend(pos_pred_i)
    
    duration = time.time() - tic
    print(f"Training: {duration//60} minutes, {duration%60} seconds")
    return mean_train_loss_list, train_loss_list, validation_acc_list, pos_pred_list

## 3.2 Train model

In [None]:
lr_run = LR
n_ep_run = N_EPOCHS
bs_run = BATCH_SIZE

optimizer = torch.optim.Adam(model.parameters(), lr=lr_run)
loss_func = nn.BCELoss()
mean_train_loss_list_run, train_loss_list_run, val_acc_list_run, pos_pred_list_run = train(model, optimizer, loss_func, X_train, y_train, X_val, y_val, bs_run, n_ep_run)
mean_train_loss_list.extend(mean_train_loss_list_run)
train_loss_list.extend(train_loss_list_run)
val_acc_list.extend(val_acc_list_run)
pos_pred_list.extend(pos_pred_list_run)

RUN_COUNTER += 1
RUNS_DICT[RUN_COUNTER] = {"Number of epochs": n_ep_run, "Learning rate": lr_run, "Batch size": bs_run}
for k,v in RUNS_DICT[RUN_COUNTER].items():
    print(f"{k}: {v}")

# 4. Evaluation

## 4.1 Plot loss, validation accuracy

In [None]:
n_epochs_actual = 0
for run, run_dict in RUNS_DICT.items():
    n_epochs_actual += run_dict["Number of epochs"]

mean_train_xrange = np.linspace(0, len(train_loss_list), len(mean_train_loss_list))
plt.figure()
plt.scatter(range(len(train_loss_list)), train_loss_list, alpha=0.15, label="Raw training loss", c="b")
plt.plot(mean_train_xrange, mean_train_loss_list, label="Epoch-averaged training loss", c="r")
plt.xticks(ticks=np.linspace(0, len(train_loss_list), n_epochs_actual+1), labels=[f"{i}" for i in range(n_epochs_actual+1)])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.title("Training Loss vs. Epochs")
plt.show()
plt.close()

plt.figure()
plt.plot(range(len(val_acc_list)), val_acc_list)
plt.ylabel("Accuracy")
plt.xticks(ticks=np.linspace(0, len(train_loss_list), n_epochs_actual+1), labels=[f"{i}" for i in range(n_epochs_actual+1)])
plt.xlabel("Epochs")
plt.title("Validation accuracy vs. Epochs")
plt.show()
plt.close()

plt.figure()
plt.plot(range(len(pos_pred_list)), pos_pred_list, label="Model predictions")
plt.plot(range(len(pos_pred_list)), [1-R_TRAIN for i in pos_pred_list], label="Training set")
R_VAL = (sum(y_val)/len(y_val)).item()
plt.plot(range(len(pos_pred_list)), [R_VAL for i in pos_pred_list], label="Validation/Test set")
plt.ylabel("Proportion of predictions that are positive")
plt.xticks(ticks=np.linspace(0, len(train_loss_list), n_epochs_actual+1), labels=[f"{i}" for i in range(n_epochs_actual+1)])
plt.xlabel("Epochs")
plt.title("Proportion of positive-predictions (on validation-set) vs. Epochs")
plt.legend()
plt.show()
plt.close()

## 4.2 Evaluate performance on test-set, with Monte-Carlo p-values controlling for the same ratio of 0's to 1's

For the Monte-Carlo p-values, we're basically checking: what are the odds of getting this level of performance by simply guessing 0's or 1's at random at the same proportion as the trained neural network? 

So the null hypothesis is:
- The neural network only learned to predict a certain ratio of 0's-to-1's in order to minimize the loss function. 

And the alternative hypothesis is:
- The neural network learned an actual (latent) structure/relationship within/between the zero-energy and finite-energy eigenvectors. 

If the null hypothesis is true, we should expect that randomly choosing 0's or 1's - at the same ratio as the trained model - should give similar performance to the trained model. Looking at the Monte-Carlo distribution of performance metrics (i.e. accuracy, precision, recall, F1), the performance of the trained model should not be too far from average. 


If the alternative hypothesis is true, randomly choosing 0's or 1's at the same ratio as the trained model will not reproduce the trained model's performance. The neural network's performance metrics should not seem average within in the Monte-Carlo distribution of performance metrics

First, some helper functions

In [None]:
# Computes binary classification performance metrics
def get_metrics(y_true, y_pred): 
    true_pos = sum((y_pred==y_true)&(y_pred==1))
    true_neg = sum((y_pred==y_true)&(y_pred==0))
    false_pos = sum((y_pred!=y_true)&(y_pred==1))
    false_neg = sum((y_pred!=y_true)&(y_pred==0))

    acc = (true_pos+true_neg)/(true_pos+true_neg+false_pos+false_neg)
    prec = true_pos/(true_pos+false_pos)
    recall = true_pos/(true_pos+false_neg)
    f1 = 2*prec*recall/(prec+recall)
    return acc, prec, recall, f1

# Generates random binary monte-carlo predictions, with the given probability of a positive prediction
def monte_carlo_bin_pred(sample_size, n_samples, p_pos_pred, vary_p_pos): 
    mc_predictions = np.zeros(shape=(n_samples, sample_size))
    for i in range(n_samples):
        if vary_p_pos:
            p_pos_pred_i = np.random.choice(np.linspace(max(0, p_pos_pred-vary_p_pos), min(p_pos_pred+vary_p_pos,1), 100), size=1)[0]
            pred_i = np.random.choice([0, 1], size=sample_size, p=[1-p_pos_pred_i, p_pos_pred_i])
        else:
            pred_i = np.random.choice([0, 1], size=sample_size, p=[1-p_pos_pred, p_pos_pred])
        mc_predictions[i] = pred_i
    return mc_predictions

# Computes (distribution of) monte-carlo performance metrics
def monte_carlo_performance(y_truth, mc_preds, plots): 
    n_mc_samples = mc_preds.shape[0]

    acc = np.zeros(shape=(n_mc_samples))
    prec = np.zeros(shape=(n_mc_samples))
    recall = np.zeros(shape=(n_mc_samples))
    f1 = np.zeros(shape=(n_mc_samples))
    for i in range(n_mc_samples):
        acc[i], prec[i], recall[i], f1[i] = get_metrics(mc_preds[i], y_truth) 

    if plots:
        plt.figure()
        plt.hist(acc, bins=40)
        plt.title("Monte-Carlo Accuracy")
        plt.show()
        plt.close()

        plt.figure()
        plt.hist(prec, bins=40)
        plt.title("Monte-Carlo Precision")
        plt.show()
        plt.close()

        plt.figure()
        plt.hist(recall, bins=40)
        plt.title("Monte-Carlo Recall")
        plt.show()
        plt.close()

        plt.figure()
        plt.hist(f1, bins=40)
        plt.title("Monte-Carlo F1-Scores")
        plt.show()
        plt.close()

    mc_performance_dict = {"Accuracy": acc,
                "Precision": prec,
                "Recall": recall,
                "F1": f1}
    return mc_performance_dict
    
# Computes performance metric of given predictions, and the monte-carlo p-values of those metrics
def get_performance(y_truth, y_pred, mc_performance_dict, plots=False): 

    acc, prec, recall, f1 = get_metrics(y_truth, y_pred) # model performance metrics 
    
    # monte carlo performance metrics
    mc_acc = mc_performance_dict["Accuracy"]
    mc_prec = mc_performance_dict["Precision"]
    mc_recall = mc_performance_dict["Recall"]
    mc_f1 = mc_performance_dict["F1"]

    n_mc_samples = len(mc_acc) # number of monte carlo samples
    p_acc = (np.sum(acc < mc_acc)+1)/(n_mc_samples+1) # Accyracy p-value
    p_prec = (np.sum(prec < mc_prec)+1)/(n_mc_samples+1) # Precision p-value
    p_recall = (np.sum(recall < mc_recall)+1)/(n_mc_samples+1) # Recall p-value
    p_f1 = (np.sum(f1 < mc_f1)+1)/(n_mc_samples+1) # F1 p-value

    performance_dict = {"Accuracy": (acc, p_acc),
                        "Precision": (prec, p_prec),
                        "Recall": (recall, p_recall),
                        "F1": (f1, p_f1)}
    return performance_dict

In [None]:
PLOTS = True # Whether to plot Monte-Carlo distributions
VARY_P_POS = None # Amount by which the Monte-Carlo predictions should vary from the model's prediction of the proportion of positive predictions. If None, then no variation
# TO-DO: If VARY_P_POS is not None, then what distribution should we choose from? Uniform? Gaussian? Beta?

# Generate test predictions
with torch.no_grad():
    output_test = torch.squeeze(model(X_test))
    output_test = torch.round(output_test)
p_pos_pred = sum(output_test==1)/len(output_test) # Predicted proportion of 1's, use these for Monte-Carlo simulations
p_pos_true = sum(y_test==1)/len(y_test) # True proportion of 1's
print(f"Predicted proportion of 1's: {p_pos_pred:.2f}")
print(f"True proportion of 1's: {p_pos_true:.2f}")

# Generate Monte-Carlo predictions and performance metrics
if not MC_SIMULATED:
    MC_PREDICTIONS = monte_carlo_bin_pred(len(y_test), int(1e4), p_pos_pred.item(), vary_p_pos=VARY_P_POS)
    MC_PERFORMANCE_DICT = monte_carlo_performance(y_test.cpu().numpy(), MC_PREDICTIONS, plots=PLOTS)
    MC_SIMULATED = True

# Get model's performance metrics, with p-values based on Monte-Carlo simulations
performance_dict = get_performance(y_test.cpu().numpy(), output_test.cpu().numpy(), MC_PERFORMANCE_DICT, plots=PLOTS)
print(f"Trained model performance:")
for key, value in performance_dict.items():
    print(f"  {key}: ({100*value[0]:.2f}%, p={value[1]:.5f})")

## 4.4 Compare to randomly-trained model

In [None]:
#torch.cuda.empty_cache()
TEST_RANDOM_ACTUAL = False # Allows user to change their mind and not run random model
if TEST_RANDOM:

    # 1. Train random model for same epochs, learning rate as true model
    for run, run_dict in RUNS_DICT.items():
        if run in RUNS_DICT_RANDOM:
            continue

        # Create random labels 
        shuffler = np.random.permutation(y_train.shape[0])
        y_train_random = y_train[shuffler]

        optimizer_random = torch.optim.Adam(model_random.parameters(), lr=run_dict["Learning rate"])
        loss_func = nn.BCELoss()
        mean_train_loss_list_random_run, train_loss_list_random_run, val_acc_list_random_run, pos_pred_list_random_run = train(model_random, optimizer_random, loss_func, X_train, y_train_random, X_val, y_val, run_dict["Batch size"], run_dict["Number of epochs"])
        mean_train_loss_list_random.extend(mean_train_loss_list_random_run)
        train_loss_list_random.extend(train_loss_list_random_run)
        val_acc_list_random.extend(val_acc_list_random_run)
        pos_pred_list_random.extend(pos_pred_list_random_run)

        RUNS_DICT_RANDOM[run] = RUNS_DICT[run].copy()

    ## 2. Plot loss, validation accuracy
    mean_train_xrange_random = np.linspace(0, len(train_loss_list_random), len(mean_train_loss_list_random))
    plt.figure()
    plt.scatter(range(len(train_loss_list_random)), train_loss_list_random, alpha=0.15, c="b", label="Raw training loss")
    plt.plot(mean_train_xrange_random, mean_train_loss_list_random, c="r", label="Epoch-averaged training loss")
    plt.ylabel("Loss")
    plt.xticks(ticks=np.linspace(0, len(train_loss_list_random), n_epochs_actual+1), labels=[f"{i}" for i in range(n_epochs_actual+1)])
    plt.xlabel("Epochs")
    plt.legend()
    plt.title("Random Model: Training Loss vs. Epochs")
    plt.show()
    plt.close()

    plt.figure()
    plt.plot(range(len(val_acc_list_random)), val_acc_list_random)
    plt.ylabel("Accuracy")
    plt.xticks(ticks=np.linspace(0, len(train_loss_list_random), n_epochs_actual+1), labels=[f"{i}" for i in range(n_epochs_actual+1)])
    plt.xlabel("Epochs")
    plt.title("Random Model: Validation Accuracy vs. Epochs")
    plt.show()
    plt.close()

    plt.figure()
    plt.plot(range(len(pos_pred_list_random)), pos_pred_list_random, label="Model predictions")
    plt.plot(range(len(pos_pred_list_random)), [1-R_TRAIN for i in pos_pred_list_random], label="Training set")
    R_VAL = (sum(y_val)/len(y_val)).item()
    plt.plot(range(len(pos_pred_list_random)), [R_VAL for i in pos_pred_list_random], label="Validation/Test set")
    plt.ylabel("Proportion of predictions that are positive")
    plt.xticks(ticks=np.linspace(0, len(train_loss_list_random), n_epochs_actual+1), labels=[f"{i}" for i in range(n_epochs_actual+1)])
    plt.xlabel("Epochs")
    plt.title("Random Model: Proportion of positive-predictions (on validation-set) vs. Epochs")
    plt.legend()
    plt.show()
    plt.close()

    # 3. Evaluate on test-set, with p-values based on Monte-Carlo simulations

    # 3.a. Generate test predictions
    with torch.no_grad():
        output_test_random = torch.squeeze(model_random(X_test))
        output_test_random = torch.round(output_test_random)
    p_pos_pred_random = sum(output_test_random==1)/len(output_test_random) # Predicted proportion of 1's, use these for Monte-Carlo simulations
    print(f"Predicted proportion of 1's: {p_pos_pred_random:.2f}")
    print(f"True proportion of 1's: {p_pos_true:.2f}")

    # 3.b. Generate Monte-Carlo predictions and performance metrics
    if not MC_RANDOM_SIMULATED:
        MC_PREDICTIONS_RANDOM = monte_carlo_bin_pred(len(y_test), int(1e4), p_pos_pred_random.item(), vary_p_pos=VARY_P_POS)
        MC_PERFORMANCE_DICT_RANDOM = monte_carlo_performance(y_test.cpu().numpy(), MC_PREDICTIONS_RANDOM, plots=False)
        MC_RANDOM_SIMULATED = True

    # 3.c. Get model's performance metrics, with p-values based on Monte-Carlo simulations
    performance_dict_random = get_performance(y_test.cpu().numpy(), output_test_random.cpu().numpy(), MC_PERFORMANCE_DICT_RANDOM, plots=PLOTS)
    print(f"Random model performance:")
    for key, value in performance_dict_random.items():
        print(f"  {key}: ({100*value[0]:.2f}%, p={value[1]:.5f})")
    
    TEST_RANDOM_ACTUAL = True

# 5. Save model and about.txt

In [None]:
prev_runs = [int(i.split("_")[1].replace(".pt","")) for i in os.listdir(CNN_DIR) if ".pt" in i]
if len(prev_runs) == 0:
    max_run = 0
else:
    max_run = np.max(np.array(prev_runs))
RUN_PATH = os.path.join(CNN_DIR, f"run_{max_run+1}.pt")
torch.save(model.state_dict(), RUN_PATH)

RUN_ABOUT_PATH = RUN_PATH.replace(".pt", ".txt")
with open(RUN_ABOUT_PATH, "w") as f:
    # Random seeds for reproducibility
    f.write(f"Numpy random seed: {NP_RANDOM_SEED}")
    f.write(f"\nTorch random seed: {TORCH_RANDOM_SEED}")
    
    # Physical constants
    f.write(f"\n\nN: {N}\nJ: {J}\nFIRST_NONZERO: {FIRST_NONZERO}") 

    # Training parameters
    f.write(f"\n\nModel: {model._modules}\nN_SAMPLES: {N_SAMPLES}\nR_TRAIN: {R_TRAIN}\nP_POS_TRUE: {p_pos_true}") # model parameters
    f.write("\nRUNS_DICT")
    for k, v in RUNS_DICT.items():
        f.write(f"\n  {k}: {v}")

    # Model performance
    f.write("\n\nPerformance at " + f"VARY_P_POS = {VARY_P_POS}")
    for k, v in performance_dict.items():
        f.write(f"\n  {k}: ({100*v[0]:.2f}%, p={v[1]})")

    # Random model performance
    if TEST_RANDOM and TEST_RANDOM_ACTUAL:
        f.write("\n\nRANDOM MODEL Performance at " + f"VARY_P_POS = {VARY_P_POS}")
        for k, v in performance_dict_random.items():
            f.write(f"\n  {k}: ({100*v[0]:.2f}%, p={v[1]})")