## Example for 002 resolution

In [None]:
import os
import pandas as pd
import numpy as np
import re

# Assignment of numbers to abbreviations
# I took the numbers from the csv files from the atlas/template folder/file
number_to_abbreviation = {
    3: "CL_ACC", 11: "CL_MCC", 17: "CL_med_OFC", 25: "CL_lat_OFC", 37: "CL_caudal_OFC",
    51: "CL_area_8A", 54: "CL_dlPFC", 64: "CL_vlPFC", 78: "CL_M1/PM", 87: "CL_SMA/preSMA",
    92: "CL_SI", 95: "CL_SII", 97: "CL_V6/V6A", 102: "CL_area_5", 107: "CL_vm_IPS",
    113: "CL_lat_IPS", 119: "CL_MST", 120: "CL_area_7_in_IPL", 126: "CL_area_7m", 
    127: "CL_PCgG", 148: "CL_paraHipp", 153: "CL_Rh", 165: "CL_TG", 176: "CL_TEO",
    177: "CL_TE", 188: "CL_STSf", 194: "CL_STGr/STSd", 199: "CL_STGc", 205: "CL_belt",
    218: "CL_RTp", 219: "CL_core", 224: "CL_floor_of_ls", 232: "CL_MT", 234: "CL_V4",
    237: "CL_V2-V3", 246: "CL_V1", 503: "CR_ACC", 511: "CR_MCC", 517: "CR_med_OFC",
    525: "CR_lat_OFC", 537: "CR_caudal_OFC", 551: "CR_area_8A", 554: "CR_dlPFC",
    564: "CR_vlPFC", 578: "CR_M1/PM", 587: "CR_SMA/preSMA", 592: "CR_SI", 595: "CR_SII",
    597: "CR_V6/V6A", 602: "CR_area_5", 607: "CR_vm_IPS", 613: "CR_lat_IPS", 619: "CR_MST",
    620: "CR_area_7_in_IPL", 626: "CR_area_7m", 627: "CR_PCgG", 648: "CR_paraHipp",
    653: "CR_Rh", 665: "CR_TG", 676: "CR_TEO", 677: "CR_TE", 688: "CR_STSf", 
    694: "CR_STGr/STSd", 699: "CR_STGc", 705: "CR_belt", 718: "CR_RTp", 719: "CR_core",
    724: "CR_floor_of_ls", 732: "CR_MT", 734: "CR_V4", 737: "CR_V2-V3", 746: "CR_V1", 1652: "SR_APul", 
    1653: "SR_MPul", 1654: "SR_LPul", 1655: "SR_IPul", 1152: "SL_APul", 1153: "SL_MPul", 1154: "SL_LPul", 1155: "SL_IPul"
}

# Function to load the netts files from a folder
def load_netts_files(folder_path):
    data_dict = {}
    
    for file in sorted(os.listdir(folder_path)):  # Dateien alphabetisch sortieren
        if file.endswith(".netts"):
            file_path = os.path.join(folder_path, file)
            df = pd.read_csv(file_path, delim_whitespace=True, header=None)
            variable_name = file.replace('.netts', '')  # Use the filename as the variable name
            data_dict[variable_name] = df.T  
    
    return data_dict

# Function for calculate the correlation matrix
def calculate_correlation_matrix(data_dict):
    merged_df = pd.concat(data_dict.values(), axis=1, keys=data_dict.keys(), join='outer')
    merged_df = merged_df.interpolate(method='linear')  
    correlation_matrix = merged_df.corr()  # Calculate the Correlation matrix
    return correlation_matrix

# Function for converting the correlation matrix into the long format
def convert_to_long_format(correlation_matrix):
    long_format_data = []
    for row in correlation_matrix.index:
        for col in correlation_matrix.columns:
            # Extract the number from the filename (index) and find the abbreviation
            row_name = row[0] if isinstance(row, tuple) else row
            col_name = col[0] if isinstance(col, tuple) else col
            
            row_number = int(re.search(r'\d+', row_name).group())  
            col_number = int(re.search(r'\d+', col_name).group())  
            
            # If the number exists in the mapping, replace it with the abbreviation
            row_abbreviation = number_to_abbreviation.get(row_number, row_name)  
            col_abbreviation = number_to_abbreviation.get(col_number, col_name)  

            # Add the row for the long table
            long_format_data.append({
                'Variable1': row_abbreviation,
                'Variable2': col_abbreviation,
                'Correlation': correlation_matrix.loc[row, col]
            })

    long_format_df = pd.DataFrame(long_format_data)
    return long_format_df

# Function for saving the long matrix as a csv file
def save_long_correlation_matrix(long_df, output_file):
    long_df.to_csv(output_file, index=False)
    print(f"The long correlation matrix has been saved to {output_file}.")

# "Main process for loading the data, computing the correlation matrix, and saving the results
def process_netts_folder(folder_path, output_file):
    data_dict = load_netts_files(folder_path)  # Load all netts files
    correlation_matrix = calculate_correlation_matrix(data_dict)  # Calculate the correlation matrix
    long_df = convert_to_long_format(correlation_matrix)  # Convert into long format
    save_long_correlation_matrix(long_df, output_file)  # Save as a CSV file

# Example Usage
folder_path = "netts/Rio/predrug_002"  # Pathway to netts files
output_file = "csv_files/002_resolution/long_correlation_matrix_Rio_predrug_002.csv"  # Name of output file

# Execute the main process
process_netts_folder(folder_path, output_file)
