In [None]:
import argparse
import numpy as np
from evaluation import evaluate_all
from link_logic.gsr_net import GSRNet, Args, train, test
from MatrixVectorizer import MatrixVectorizer
from torch.optim import AdamW
import torch.optim as optim
import pandas as pd
import torch

def main():
    random_seed = 42
    np.random.seed(random_seed)

    # Initialize vectorizer
    vectorizer = MatrixVectorizer()

    # Load the data
    fold1_lr = pd.read_csv("data/clusterCV/Fold1/lr_clusterA.csv", header=None, skiprows=1)
    fold1_hr = pd.read_csv("data/clusterCV/Fold1/hr_clusterA.csv", header=None, skiprows=1)
    fold2_lr = pd.read_csv("data/clusterCV/Fold2/lr_clusterB.csv", header=None, skiprows=1)
    fold2_hr = pd.read_csv("data/clusterCV/Fold2/hr_clusterB.csv", header=None, skiprows=1)
    fold3_lr = pd.read_csv("data/clusterCV/Fold3/lr_clusterC.csv", header=None, skiprows=1)
    fold3_hr = pd.read_csv("data/clusterCV/Fold3/hr_clusterC.csv", header=None, skiprows=1)

    # Convert dataframes to numpy arrays
    fold1_lr = fold1_lr.to_numpy()
    fold1_hr = fold1_hr.to_numpy()
    fold2_lr = fold2_lr.to_numpy()
    fold2_hr = fold2_hr.to_numpy()
    fold3_lr = fold3_lr.to_numpy()
    fold3_hr = fold3_hr.to_numpy()

    # Store the fold results
    fold_results = []

    # Perform cross-validation
    for current_fold in range(1, 4):
        print(f"Fold {current_fold}: ")

        # Load the training and testing data for the current fold
        if current_fold == 1:
            low_res_test = fold1_lr
            high_res_test = fold1_hr
            low_res_train = np.concatenate((fold2_lr, fold3_lr))
            high_res_train = np.concatenate((fold2_hr, fold3_hr))
            #low_res_val = np.concatenate((fold2_lr[-20:], fold3_lr[-20:]))
            #high_res_val = np.concatenate((fold2_hr[-20:], fold3_hr[-20:]))
        elif current_fold == 2:
            low_res_test = fold2_lr
            high_res_test = fold2_hr
            low_res_train = np.concatenate((fold1_lr, fold3_lr))
            high_res_train = np.concatenate((fold1_hr, fold3_hr))
            #low_res_val = np.concatenate((fold1_lr[-20:], fold3_lr[-20:]))
            #high_res_val = np.concatenate((fold1_hr[-20:], fold3_hr[-20:]))
        else:
            low_res_test = fold3_lr
            high_res_test = fold3_hr
            low_res_train = np.concatenate((fold1_lr, fold2_lr))
            high_res_train = np.concatenate((fold1_hr, fold2_hr))
            #low_res_val = np.concatenate((fold1_lr[-20:], fold2_lr[-20:]))
            #high_res_val = np.concatenate((fold1_hr[-20:], fold2_hr[-20:]))

        # Vectorize the training, validation, and testing data
        train_input_matrices = np.array([MatrixVectorizer.anti_vectorize(x, 160) for x in low_res_train])
        val_input_matrices = None
        test_input_matrices = np.array([MatrixVectorizer.anti_vectorize(x, 160) for x in low_res_test])
        train_output_matrices = np.array([MatrixVectorizer.anti_vectorize(x, 268) for x in high_res_train])
        val_output_matrices = None
        test_output_matrices = np.array([MatrixVectorizer.anti_vectorize(x, 268) for x in high_res_test])

        print(f"Train input matrices shape: {train_input_matrices.shape}")
        print(f"Train output matrices shape: {train_output_matrices.shape}")
        #print(f"Val input matrices shape: {val_input_matrices.shape}")
        #print(f"Val output matrices shape: {val_output_matrices.shape}")
        print(f"Test input matrices shape: {test_input_matrices.shape}")
        print(f"Test output matrices shape: {test_output_matrices.shape}")

        # Train the model
        args = Args()
        args.model_path = 'model.pt'
        args.epochs = 34

        # Define the pooling ratios for the Graph U-Net architecture
        ks = [0.9, 0.7, 0.6, 0.5]

        fold_model = GSRNet(ks, args)
        optimizer = optim.AdamW(fold_model.parameters(), lr=args.lr)

        # Train the model with early stopping based on the validation set
        train(fold_model, optimizer, train_input_matrices, train_output_matrices, val_input_matrices, val_output_matrices, args, early_stopping=False)

        # Test the model
        predictions, gt_matrices = test(fold_model, test_input_matrices, test_output_matrices, args)

        # Evaluate the model on the test set and log the results
        metrics = evaluate_all(test_output_matrices, predictions, output_path=f'cluster_res/clusterCV_fold{current_fold}.csv')
        fold_results.append(metrics)

if __name__ == '__main__':
    main()