In [None]:
import os 
import pandas as pd
import numpy as np

In [None]:
derivatives_folder = "/media/jaume/DATA/Data/Urblauna_SFTP/UKB_Cardiac_BIDS/derivatives"
data_path = os.path.join(derivatives_folder, 'GraphClassification')
nodes_filename = os.path.join(derivatives_folder, "nodes_data.parquet")
global_filename = os.path.join(derivatives_folder, "global_data.parquet")

In [None]:
# Load the nodes data
nodes_data = pd.read_parquet(nodes_filename)

# Load the global data
global_data = pd.read_parquet(global_filename)

In [None]:
def get_basic_biomarkers(node_info, region, subject='test'):
    node_info['Strain_Radial'] = node_info['Strain_Radial'].astype(float)
    node_info['Strain_Circum'] = node_info['Strain_Circum'].astype(float)
    node_info['Thickness_Mean'] = node_info['Thickness_Mean'].astype(float)

    max_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].max()
    min_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].min()
    std_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].std()
    mean_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].mean()
    max_radial_strain = node_info.query(f"Region=='{region}'")['Strain_Radial'].max()
    max_circum_strain = node_info.query(f"Region=='{region}'")['Strain_Circum'].max()
    array_data = np.array([max_thick, min_thick, std_thick, mean_thick, max_radial_strain, max_circum_strain])[:, np.newaxis]
    col_names = ['Max Thickness', 'Min Thickness', 'Std Thickness', 'Mean Thickness', 'Max Radial Strain', 'Max Circum Strain']
    col_names = [f'{region}_{x}' for x in col_names]
    df_node_data = pd.DataFrame(array_data.T, columns=col_names, index=[subject])
    df_node_data.index.name = 'Subject'

    return df_node_data


def identify_early_relaxation(node_info, global_info):
    node_info['Volume_Index'] = node_info['Volume_Index'].astype(float)
    node_info['Thickness_Mean'] = node_info['Thickness_Mean'].astype(float)
    node_info['Cycle'] = node_info['Cycle'].astype(float)
    es_time = global_info['es_cycle_time'].astype(float).values[0]

    # Compute the derivative of the thickness
    volume_derivative_lv = node_info.query("Region=='LV'").set_index("Cycle").sort_index()[['Volume_Index']].diff().rolling(3).sum()
    thickness_derivative_lv = node_info.query("Region=='LV_Myo'").set_index("Cycle").sort_index()[['Thickness_Mean']].diff().rolling(3).sum()

    volume_derivative_rv = node_info.query("Region=='RV'").set_index("Cycle").sort_index()[['Volume_Index']].diff().rolling(3).sum()
    thickness_derivative_rv = node_info.query("Region=='RV_Myo'").set_index("Cycle").sort_index()[['Thickness_Mean']].diff().rolling(3).sum()

    # Identify 0s after ES time
    thickness_derivative_lv = thickness_derivative_lv[thickness_derivative_lv.index > es_time]
    volume_derivative_lv = volume_derivative_lv[volume_derivative_lv.index > es_time]

    thickness_derivative_rv = thickness_derivative_rv[thickness_derivative_rv.index > es_time]
    volume_derivative_rv = volume_derivative_rv[volume_derivative_rv.index > es_time]

    # For security discard first 3 time-pints and last 3 time-points
    if len(thickness_derivative_lv) < 5:
        early_relax_time_lv = thickness_derivative_lv.iloc[np.argmax(thickness_derivative_lv)].name
        early_relax_time_rv = thickness_derivative_rv.iloc[np.argmax(thickness_derivative_rv)].name
    else:
        early_relax_time_lv = thickness_derivative_lv.iloc[2:-2].iloc[np.argmax(thickness_derivative_lv.iloc[2:-2])].name
        early_relax_time_rv = thickness_derivative_rv.iloc[2:-2].iloc[np.argmax(thickness_derivative_rv.iloc[2:-2])].name
    early_relax_time = np.mean([early_relax_time_lv, early_relax_time_rv])

    return early_relax_time


def get_relaxation_strain_and_ratios(node_info, global_info, early_relax_time, region):
    # Get contraction and relaxation times
    node_info['Cycle'] = node_info['Cycle'].astype(float)
    ed_time = global_info['ed_cycle_time'].astype(float).values[0]
    es_time = global_info['es_cycle_time'].astype(float).values[0]
    es_data = node_info.query(f"Cycle=={es_time}").reset_index(drop=True).set_index('Region')
    ed_data = node_info.query(f"Cycle=={ed_time}").reset_index(drop=True).set_index('Region')
    end_data = node_info.query("Cycle==1").reset_index(drop=True).set_index('Region')

    # Find closest Cycle to early relaxation time
    cycle_times = node_info['Cycle'].astype(float).unique()
    distance = np.abs(cycle_times - early_relax_time)
    distance_idx = np.argmin(distance)
    early_relax_time_cycle = cycle_times[distance_idx]
    early_data = node_info.query(f"Cycle=={early_relax_time_cycle}").reset_index(drop=True).set_index('Region')

    # print(f"ED time: {ed_time}, ES time: {es_time}, Early Relaxation time: {early_relax_time}")
    ed_data = ed_data.astype(float)
    es_data = es_data.astype(float)    
    contraction_data = (ed_data - es_data)  # ED -> ES
    relaxation_data = (end_data - es_data)  # End of cycle -> ES
    early_relax_data = (early_data - es_data) # Early relaxation -> ES

    ratio_data = relaxation_data / contraction_data
    ratio_early = early_relax_data / contraction_data
    
    contraction_data = contraction_data.loc[[region]].astype(float)
    contraction_data = contraction_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Contraction',
                                                                                            'Strain_Circum': f'{region}_Strain_Circum_Contraction'})
    
    relaxation_data = relaxation_data.loc[[region]].astype(float)
    relaxation_data = relaxation_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Relaxation',
                                                                                          'Strain_Circum': f'{region}_Strain_Circum_Relaxation'})
    
    early_relax_data = early_relax_data.loc[[region]].astype(float)
    early_relax_data = early_relax_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Early_Relaxation',
                                                                                            'Strain_Circum': f'{region}_Strain_Circum_Early_Relaxation'})
    
    ratio_data = ratio_data.loc[[region]].astype(float)
    ratio_data = ratio_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Ratio',
                                                                                'Strain_Circum': f'{region}_Strain_Circum_Ratio'})
    
    ratio_early = ratio_early.loc[[region]].astype(float)
    ratio_early = ratio_early[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Ratio_Early',
                                                                                  'Strain_Circum': f'{region}_Strain_Circum_Ratio_Early'})
    
    contraction_data = contraction_data.astype(float)
    relaxation_data = relaxation_data.astype(float)
    early_relax_data = early_relax_data.astype(float)
    ratio_data = ratio_data.astype(float)
    ratio_early = ratio_early.astype(float)

    summary_strain = pd.concat([contraction_data, relaxation_data, early_relax_data, ratio_data, ratio_early], axis=1)

    return summary_strain

In [None]:
# From the nodes data
# ==========================
# Get the LV and RV biomarkers
# ==========================
biomarkers_filename = os.path.join(derivatives_folder, "biomarkers.csv")
redo = True
if not os.path.isfile(biomarkers_filename) or redo:
    df_data = pd.DataFrame(data=[])
    subjects = list(nodes_data['Subject'].unique())
    for subject in subjects:        
        nodes_file = nodes_data.query(f"Subject=='{subject}'").copy()
        nodes_file.set_index('Subject', inplace=True)
        nodes_file.drop(columns=['Label'], inplace=True)

        global_file = global_data.query(f"Subject=='{subject}'").copy()
        global_file.set_index('Subject', inplace=True)

        df_node_data_lv = get_basic_biomarkers(nodes_file, 'LV_Myo', subject=subject)
        df_node_data_rv = get_basic_biomarkers(nodes_file, 'RV_Myo', subject=subject)
        early_relax_time = identify_early_relaxation(nodes_file, global_file)
        summary_lv = get_relaxation_strain_and_ratios(nodes_file, global_file, early_relax_time, 'LV_Myo')
        summary_rv = get_relaxation_strain_and_ratios(nodes_file, global_file, early_relax_time, 'RV_Myo')

        summary_lv.index = [subject]
        summary_lv.index.name = 'Subject'

        summary_rv.index = [subject]
        summary_rv.index.name = 'Subject'

        df_node_data = pd.concat([summary_lv, summary_rv, df_node_data_lv, df_node_data_rv, global_file], axis=1)
        df_data = pd.concat([df_data, df_node_data])
    
    df_data.index.name = 'Subject'
    df_data.reset_index(inplace=True)
    df_data.sort_values('Subject', inplace=True)
    df_data.reset_index(drop=True, inplace=True)
    df_data.to_csv(biomarkers_filename)
else:
    df_data = pd.read_csv(biomarkers_filename, index_col=0)

df_data.head()