In [None]:
from utils import *
from model import *
from processing import *
from train_paper import *

import argparse
import matplotlib.pyplot as plt
from evaluation import *
from sklearn.model_selection import KFold
from sympy import fu

## Set data paths for loading

In [None]:
SPLIT_1_LR_PATH = 'RandomCV/Train/Fold1/lr_split_1.csv'
SPLIT_1_HR_PATH = 'RandomCV/Train/Fold1/hr_split_1.csv'
SPLIT_2_LR_PATH = 'RandomCV/Train/Fold2/lr_split_2.csv'
SPLIT_2_HR_PATH = 'RandomCV/Train/Fold2/hr_split_2.csv'
SPLIT_3_LR_PATH = 'RandomCV/Train/Fold3/lr_split_3.csv'
SPLIT_3_HR_PATH = 'RandomCV/Train/Fold3/hr_split_3.csv'

## Set model arguments

In [None]:
class Args(argparse.Namespace):
    epochs = 500
    lr = 0.0001
    lmbda = 0.1
    lr_dim = 160
    hr_dim = 320
    hidden_dim = 640
    padding = 26
    double_convolution = True
    dropout = 0.1
    weight_decay = 0
    
args = Args()

## 3 - Fold CV

In [None]:
# Set seeds
SEED = 42
GET_METRICS = True
torch.manual_seed(SEED)
np.random.seed(SEED)

# Load Data
split_1_adj, split_1_ground_truth = load_matrix_data(SPLIT_1_LR_PATH, SPLIT_1_HR_PATH, 93)
split_2_adj, split_2_ground_truth = load_matrix_data(SPLIT_2_LR_PATH, SPLIT_2_HR_PATH, 93)
split_3_adj, split_3_ground_truth = load_matrix_data(SPLIT_3_LR_PATH, SPLIT_3_HR_PATH, 93)

print("DATA LOADED")

fold_results = []
train_losses_all_with_val = []
val_losses_all_with_val = []
train_losses_all_no_val = []

# Run 3-fold CV
for i in range(3):
    print(f"Fold {i+1}:")
    
    # Determine train, validation, and test splits
    if i == 0:
        train_adj = torch.cat((split_2_adj[:-20], split_3_adj[:-20]), dim=0)
        train_ground_truth = torch.cat((split_2_ground_truth[:-20], split_3_ground_truth[:-20]), dim=0)
        val_adj = torch.cat((split_2_adj[-20:], split_3_adj[-20:]), dim=0)
        val_ground_truth = torch.cat((split_2_ground_truth[-20:], split_3_ground_truth[-20:]), dim=0)
        test_adj = split_1_adj
        test_ground_truth = split_1_ground_truth
    elif i == 1:
        train_adj = torch.cat((split_1_adj[:-20], split_3_adj[:-20]), dim=0)
        train_ground_truth = torch.cat((split_1_ground_truth[:-20], split_3_ground_truth[:-20]), dim=0)
        val_adj = torch.cat((split_1_adj[-20:], split_3_adj[-20:]), dim=0)
        val_ground_truth = torch.cat((split_1_ground_truth[-20:], split_3_ground_truth[-20:]), dim=0)
        test_adj = split_2_adj
        test_ground_truth = split_2_ground_truth
    else:
        train_adj = torch.cat((split_1_adj[:-20], split_2_adj[:-20]), dim=0)
        train_ground_truth = torch.cat((split_1_ground_truth[:-20], split_2_ground_truth[:-20]), dim=0)
        val_adj = torch.cat((split_1_adj[-20:], split_2_adj[-20:]), dim=0)
        val_ground_truth = torch.cat((split_1_ground_truth[-20:], split_2_ground_truth[-20:]), dim=0)
        test_adj = split_3_adj
        test_ground_truth = split_3_ground_truth
    
    # Initialize model
    model = SuperBLTGraph(args)
    
    train_labels = create_discrepancy(train_ground_truth, zero_shift=-0.05)
    
    # Train model
    train_losses, val_losses, best_epoch, lr_schedule = train(model, train_adj, train_labels, args, val_adj, val_ground_truth)
    train_losses_all_with_val.append(train_losses)
    val_losses_all_with_val.append(val_losses)

    # Get metrics for the left-out fold
    test_outputs = compute_output_hr(args, test_adj, model)
    metrics = evaluate_all(test_ground_truth.detach().numpy(), test_outputs)

### Plot loss

In [None]:
# Create plots for each fold
for i in range(3):
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses_all_with_val[i], label='Training Loss')
    plt.plot(val_losses_all_with_val[i], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {i+1} - Training and Validation Loss')
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

In [None]:
# Create plots for each fold
for i in range(3):
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses_all_no_val[i], label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {i+1} - Training Loss (No Validation)')
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

In [None]:
identity_df = pd.read_csv('ID-randomCV.csv', index_col=0, header=0)

In [None]:
identity_df

In [None]:
#add row averaging and std columns except for the first column and top row
identity_df.loc['mean'] = identity_df.mean()
identity_df.loc['std'] = identity_df.std()

In [None]:
identity_df

In [None]:
# save the dataframe to a csv file
identity_df.to_csv('01-randomCV.csv')

# ClusterCV

## Set data paths for loading

In [None]:
SPLIT_1_LR_PATH = 'Cluster-CV/Fold1/lr_clusterA.csv'
SPLIT_1_HR_PATH = 'Cluster-CV/Fold1/hr_clusterA.csv'
SPLIT_2_LR_PATH = 'Cluster-CV/Fold2/lr_clusterB.csv'
SPLIT_2_HR_PATH = 'Cluster-CV/Fold2/hr_clusterB.csv'
SPLIT_3_LR_PATH = 'Cluster-CV/Fold3/lr_clusterC.csv'
SPLIT_3_HR_PATH = 'Cluster-CV/Fold3/hr_clusterC.csv'

## Set model arguments

In [None]:
class Args(argparse.Namespace):
    epochs = 500
    lr = 0.0001
    lmbda = 0.1
    lr_dim = 160
    hr_dim = 320
    hidden_dim = 640
    padding = 26
    double_convolution = True
    dropout = 0.1
    weight_decay = 0
    
args = Args()

## 3 - Fold CV

In [None]:
# Set seeds
SEED = 42
GET_METRICS = True
torch.manual_seed(SEED)
np.random.seed(SEED)

# Load Data
split_1_adj, split_1_ground_truth = load_matrix_data(SPLIT_1_LR_PATH, SPLIT_1_HR_PATH, 103)
split_2_adj, split_2_ground_truth = load_matrix_data(SPLIT_2_LR_PATH, SPLIT_2_HR_PATH, 103)
split_3_adj, split_3_ground_truth = load_matrix_data(SPLIT_3_LR_PATH, SPLIT_3_HR_PATH, 76)

print("DATA LOADED")

fold_results = []
train_losses_all_with_val = []
val_losses_all_with_val = []
train_losses_all_no_val = []

# Run 3-fold CV
for i in range(3):
    print(f"Fold {i+1}:")
    
    # Determine train, validation, and test splits
    if i == 0:
        train_adj = torch.cat((split_2_adj[:-20], split_3_adj[:-20]), dim=0)
        train_ground_truth = torch.cat((split_2_ground_truth[:-20], split_3_ground_truth[:-20]), dim=0)
        val_adj = torch.cat((split_2_adj[-20:], split_3_adj[-20:]), dim=0)
        val_ground_truth = torch.cat((split_2_ground_truth[-20:], split_3_ground_truth[-20:]), dim=0)
        test_adj = split_1_adj
        test_ground_truth = split_1_ground_truth
    elif i == 1:
        train_adj = torch.cat((split_1_adj[:-20], split_3_adj[:-20]), dim=0)
        train_ground_truth = torch.cat((split_1_ground_truth[:-20], split_3_ground_truth[:-20]), dim=0)
        val_adj = torch.cat((split_1_adj[-20:], split_3_adj[-20:]), dim=0)
        val_ground_truth = torch.cat((split_1_ground_truth[-20:], split_3_ground_truth[-20:]), dim=0)
        test_adj = split_2_adj
        test_ground_truth = split_2_ground_truth
    else:
        train_adj = torch.cat((split_1_adj[:-20], split_2_adj[:-20]), dim=0)
        train_ground_truth = torch.cat((split_1_ground_truth[:-20], split_2_ground_truth[:-20]), dim=0)
        val_adj = torch.cat((split_1_adj[-20:], split_2_adj[-20:]), dim=0)
        val_ground_truth = torch.cat((split_1_ground_truth[-20:], split_2_ground_truth[-20:]), dim=0)
        test_adj = split_3_adj
        test_ground_truth = split_3_ground_truth
    
    # Initialize model
    model = SuperBLTGraph(args)
    
    train_labels = create_discrepancy(train_ground_truth, zero_shift=-0.05)
    
    # Train model
    train_losses, val_losses, best_epoch, lr_schedule = train(model, train_adj, train_labels, args, val_adj, val_ground_truth)
    train_losses_all_with_val.append(train_losses)
    val_losses_all_with_val.append(val_losses)

   # Retrain model on full training set (without validation)
    full_train_adj = torch.cat((train_adj, val_adj), dim=0)
    full_train_ground_truth = torch.cat((train_ground_truth, val_ground_truth), dim=0)
    
    model = SuperBLTGraph(args)
    train_labels = create_discrepancy(full_train_ground_truth, zero_shift=-0.05)

    train_losses = retrain_model(model, full_train_adj, full_train_ground_truth, args, lr_schedule, best_epoch)
    train_losses_all_no_val.append(train_losses)
    
    # Get metrics for the left-out fold
    test_outputs = compute_output_hr(args, test_adj, model)
    metrics = evaluate_all(test_ground_truth.detach().numpy(), test_outputs)

### Plot loss

In [None]:
# Create plots for each fold
for i in range(3):
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses_all_with_val[i], label='Training Loss')
    plt.plot(val_losses_all_with_val[i], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {i+1} - Training and Validation Loss')
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

In [None]:
# Create plots for each fold
for i in range(3):
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses_all_no_val[i], label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {i+1} - Training Loss (No Validation)')
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

In [None]:
identity_df = pd.read_csv('ID-randomCV.csv', index_col=0, header=0)

In [None]:
identity_df

In [None]:
#add row averaging and std columns except for the first column and top row
identity_df.loc['mean'] = identity_df.mean()
identity_df.loc['std'] = identity_df.std()

In [None]:
identity_df

In [None]:
# save the dataframe to a csv file
identity_df.to_csv('01-clusterCV.csv')