In [1]:
import os 
import pandas as pd
import numpy as np

In [2]:
derivatives_folder = '/media/jaume/DATA/Data/New_ACDC/MIDS/mixed/derivatives'
data_path = os.path.join(derivatives_folder, 'GraphClassification')
nodes_filename = os.path.join(derivatives_folder, "nodes_data.parquet")
global_filename = os.path.join(derivatives_folder, "global_data.parquet")

In [3]:
# Load the nodes data
nodes_data = pd.read_parquet(nodes_filename)
# Load the global data
global_data = pd.read_parquet(global_filename)

print(nodes_data.columns)
print(global_data.columns)

Index(['Intensity_Mean', 'Intensity_Median', 'Intensity_q1', 'Intensity_q3',
       'Intensity_IQ', 'Volume', 'CM_X', 'CM_Y', 'CM_Z', 'J_Mean', 'J_Median',
       'J_q1', 'J_q3', 'J_IQ', 'Def_X_Mean', 'Def_X_Median', 'Def_X_q1',
       'Def_X_q3', 'Def_X_IQ', 'Def_Y_Mean', 'Def_Y_Median', 'Def_Y_q1',
       'Def_Y_q3', 'Def_Y_IQ', 'Def_Z_Mean', 'Def_Z_Median', 'Def_Z_q1',
       'Def_Z_q3', 'Def_Z_IQ', 'Thickness_Mean', 'Thickness_Median',
       'Thickness_q1', 'Thickness_q3', 'Thickness_IQ', 'Strain_Circum',
       'Strain_Radial', 'Cycle', 'Region', 'Volume_Index', 'Subject',
       'Region_ID', 'Cycle_ID', 'Label', 'Label_ID'],
      dtype='object')
Index(['Subject', 'Group', 'Height', 'Weight', 'BMI', 'BSA', 'Region', 'Cycle',
       'RV_Myo_SV', 'RV_Myo_SVI', 'RV_Myo_EF', 'RV_SV', 'RV_SVI', 'RV_EF',
       'LV_Myo_SV', 'LV_Myo_SVI', 'LV_Myo_EF', 'LV_SV', 'LV_SVI', 'LV_EF',
       'dt', 'ed_cycle_time', 'es_cycle_time', 'ed_frame_idx', 'es_frame_idx'],
      dtype='object')


In [4]:
def get_basic_biomarkers(node_info, region, subject='test'):
    node_info['Strain_Radial'] = node_info['Strain_Radial'].astype(float)
    node_info['Strain_Circum'] = node_info['Strain_Circum'].astype(float)
    node_info['Thickness_Mean'] = node_info['Thickness_Mean'].astype(float)

    max_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].max()
    min_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].min()
    std_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].std()
    mean_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].mean()
    max_radial_strain = node_info.query(f"Region=='{region}'")['Strain_Radial'].max()
    max_circum_strain = node_info.query(f"Region=='{region}'")['Strain_Circum'].max()
    array_data = np.array([max_thick, min_thick, std_thick, mean_thick, max_radial_strain, max_circum_strain])[:, np.newaxis]
    col_names = ['Max Thickness', 'Min Thickness', 'Std Thickness', 'Mean Thickness', 'Max Radial Strain', 'Max Circum Strain']
    col_names = [f'{region}_{x}' for x in col_names]
    df_node_data = pd.DataFrame(array_data.T, columns=col_names, index=[subject])
    df_node_data.index.name = 'Subject'

    return df_node_data


def identify_early_relaxation(node_info, global_info):
    node_info['Volume_Index'] = node_info['Volume_Index'].astype(float)
    node_info['Thickness_Mean'] = node_info['Thickness_Mean'].astype(float)
    node_info['Cycle'] = node_info['Cycle'].astype(float)
    es_time = global_info['es_cycle_time'].astype(float).values[0]

    # Compute the derivative of the thickness
    volume_derivative_lv = node_info.query("Region=='LV'").set_index("Cycle").sort_index()[['Volume_Index']].diff().rolling(3).sum()
    thickness_derivative_lv = node_info.query("Region=='LV_Myo'").set_index("Cycle").sort_index()[['Thickness_Mean']].diff().rolling(3).sum()

    volume_derivative_rv = node_info.query("Region=='RV'").set_index("Cycle").sort_index()[['Volume_Index']].diff().rolling(3).sum()
    thickness_derivative_rv = node_info.query("Region=='RV_Myo'").set_index("Cycle").sort_index()[['Thickness_Mean']].diff().rolling(3).sum()

    # Identify 0s after ES time
    thickness_derivative_lv = thickness_derivative_lv[thickness_derivative_lv.index > es_time]
    volume_derivative_lv = volume_derivative_lv[volume_derivative_lv.index > es_time]

    thickness_derivative_rv = thickness_derivative_rv[thickness_derivative_rv.index > es_time]
    volume_derivative_rv = volume_derivative_rv[volume_derivative_rv.index > es_time]

    # For security discard first 3 time-pints and last 3 time-points
    early_relax_time_lv = thickness_derivative_lv.iloc[3:-3].iloc[np.argmax(thickness_derivative_lv.iloc[3:-3])].name
    early_relax_time_rv = thickness_derivative_rv.iloc[3:-3].iloc[np.argmax(thickness_derivative_rv.iloc[3:-3])].name
    early_relax_time = np.mean([early_relax_time_lv, early_relax_time_rv])

    return early_relax_time


def get_relaxation_strain_and_ratios(node_info, global_info, early_relax_time, region):
    # Get contraction and relaxation times
    # es_idx = global_file['es_frame_idx'].values[0]
    node_info['Cycle'] = node_info['Cycle'].astype(float)
    ed_time = global_info['ed_cycle_time'].astype(float).values[0]
    es_time = global_info['es_cycle_time'].astype(float).values[0]
    es_data = node_info.query(f"Cycle=={es_time}").reset_index(drop=True).set_index('Region')
    ed_data = node_info.query(f"Cycle=={ed_time}").reset_index(drop=True).set_index('Region')
    end_data = node_info.query("Cycle==1").reset_index(drop=True).set_index('Region')

    # Find closest Cycle to early relaxation time
    cycle_times = node_info['Cycle'].astype(float).unique()
    distance = np.abs(cycle_times - early_relax_time)
    distance_idx = np.argmin(distance)
    early_relax_time_cycle = cycle_times[distance_idx]
    early_data = node_info.query(f"Cycle=={early_relax_time_cycle}").reset_index(drop=True).set_index('Region')

    # print(f"ED time: {ed_time}, ES time: {es_time}, Early Relaxation time: {early_relax_time}")
    # print(f"ED data: {ed_data.loc[region]}")
    # print(es_data.query(f"Region=='{region}'"))
    ed_data = ed_data.astype(float)
    es_data = es_data.astype(float)    
    contraction_data = (ed_data - es_data) #/ ed_data
    relaxation_data = (end_data - es_data) #/ ed_data
    early_relax_data = (early_data - es_data) #/ ed_data

    ratio_data = relaxation_data / contraction_data
    ratio_early = early_relax_data / contraction_data
    
    contraction_data = contraction_data.loc[[region]].astype(float)
    # contraction_data = contraction_data[['Thickness_Mean']].rename(columns={'Thickness_Mean': f'{region}_Strain_Radial_Contraction'})
    contraction_data = contraction_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Contraction',
                                                                                            'Strain_Circum': f'{region}_Strain_Circum_Contraction'})
    
    relaxation_data = relaxation_data.loc[[region]].astype(float)
    relaxation_data = relaxation_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Relaxation',
                                                                                          'Strain_Circum': f'{region}_Strain_Circum_Relaxation'})
    
    early_relax_data = early_relax_data.loc[[region]].astype(float)
    early_relax_data = early_relax_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Early_Relaxation',
                                                                                            'Strain_Circum': f'{region}_Strain_Circum_Early_Relaxation'})
    
    ratio_data = ratio_data.loc[[region]].astype(float)
    ratio_data = ratio_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Ratio',
                                                                                'Strain_Circum': f'{region}_Strain_Circum_Ratio'})
    
    ratio_early = ratio_early.loc[[region]].astype(float)
    ratio_early = ratio_early[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Ratio_Early',
                                                                                  'Strain_Circum': f'{region}_Strain_Circum_Ratio_Early'})
    
    contraction_data = contraction_data.astype(float)
    relaxation_data = relaxation_data.astype(float)
    early_relax_data = early_relax_data.astype(float)
    ratio_data = ratio_data.astype(float)
    ratio_early = ratio_early.astype(float)

    summary_strain = pd.concat([contraction_data, relaxation_data, early_relax_data, ratio_data, ratio_early], axis=1)

    # summary_strain = pd.concat([contraction_data.loc[region].rename(f'{region}_Strain_Contraction'),
    #                             relaxation_data.loc[region].rename(f'{region}_Strain_Relaxtion'),
    #                             early_relax_data.loc[region].rename(f'{region}_Strain_Early_Relaxtion'),
    #                             ratio_data.loc[region].rename(f'{region}_Strain_Ratio'),
    #                             ratio_early.loc[region].rename(f'{region}_Strain_Ratio_Early')], axis=1)

    return summary_strain

In [5]:
# From the nodes data
# ==========================
# Get the LV and RV biomarkers
# ==========================
biomarkers_filename = os.path.join(derivatives_folder, "biomarkers.csv")
redo = True
if not os.path.isfile(biomarkers_filename) or redo:
    df_data = pd.DataFrame(data=[])
    subjects = list(nodes_data['Subject'].unique())
    for subject in subjects:        
        nodes_file = nodes_data.query(f"Subject=='{subject}'").copy()
        nodes_file.set_index('Subject', inplace=True)
        nodes_file.drop(columns=['Label'], inplace=True)

        global_file = global_data.query(f"Subject=='{subject}'").copy()
        global_file.set_index('Subject', inplace=True)

        df_node_data_lv = get_basic_biomarkers(nodes_file, 'LV_Myo', subject=subject)
        df_node_data_rv = get_basic_biomarkers(nodes_file, 'RV_Myo', subject=subject)
        early_relax_time = identify_early_relaxation(nodes_file, global_file)
        summary_lv = get_relaxation_strain_and_ratios(nodes_file, global_file, early_relax_time, 'LV_Myo')
        summary_rv = get_relaxation_strain_and_ratios(nodes_file, global_file, early_relax_time, 'RV_Myo')

        summary_lv.index = [subject]
        summary_lv.index.name = 'Subject'

        summary_rv.index = [subject]
        summary_rv.index.name = 'Subject'

        # summ_thick_lv = summary_lv.loc[['Thickness_Mean']].rename(index={'Thickness_Mean': subject})
        # summ_thick_rv = summary_rv.loc[['Thickness_Mean']].rename(index={'Thickness_Mean': subject})

        df_node_data = pd.concat([summary_lv, summary_rv, df_node_data_lv, df_node_data_rv, global_file], axis=1)
        df_data = pd.concat([df_data, df_node_data])
    
    df_data.index.name = 'Subject'
    df_data.reset_index(inplace=True)
    df_data.sort_values('Subject', inplace=True)
    df_data.reset_index(drop=True, inplace=True)
    df_data.to_csv(biomarkers_filename)
else:
    df_data = pd.read_csv(biomarkers_filename, index_col=0)

df_data.head()

Unnamed: 0,Subject,LV_Myo_Strain_Radial_Contraction,LV_Myo_Strain_Circum_Contraction,LV_Myo_Strain_Radial_Relaxation,LV_Myo_Strain_Circum_Relaxation,LV_Myo_Strain_Radial_Early_Relaxation,LV_Myo_Strain_Circum_Early_Relaxation,LV_Myo_Strain_Radial_Ratio,LV_Myo_Strain_Circum_Ratio,LV_Myo_Strain_Radial_Ratio_Early,...,LV_Myo_SVI,LV_Myo_EF,LV_SV,LV_SVI,LV_EF,dt,ed_cycle_time,es_cycle_time,ed_frame_idx,es_frame_idx
0,sub-001,-17.006488,5.33139,-14.889953,5.96635,1.965311,2.669541,0.875545,1.119098,-0.115562,...,-8.165436,-0.109282,66.40625,30.135667,0.225054,0.6,0.0,0.367347,1,11
1,sub-002,-18.106131,5.363983,-16.211021,6.062543,6.748317,1.934615,0.895333,1.130231,-0.372709,...,-15.927598,-0.170119,72.207525,40.937,0.279725,0.6,0.0,0.408163,0,12
2,sub-003,-6.5241,2.485568,-5.910453,3.091729,-4.821624,3.61402,0.905942,1.243872,0.739048,...,-1.962331,-0.019713,29.833984,15.880585,0.107684,0.6,0.0,0.428571,28,13
3,sub-004,-13.391888,2.036483,-11.646865,3.000486,1.897255,1.254742,0.869696,1.473367,-0.141672,...,-11.002333,-0.101267,37.73932,26.476414,0.141377,0.56,0.0,0.571429,27,16
4,sub-005,-18.371355,3.916858,-18.899404,4.801919,1.334369,1.619229,1.028743,1.225962,-0.072633,...,-16.168566,-0.157619,63.28125,33.684513,0.216304,0.6,0.0,0.408163,3,12


In [6]:
df_data

Unnamed: 0,Subject,LV_Myo_Strain_Radial_Contraction,LV_Myo_Strain_Circum_Contraction,LV_Myo_Strain_Radial_Relaxation,LV_Myo_Strain_Circum_Relaxation,LV_Myo_Strain_Radial_Early_Relaxation,LV_Myo_Strain_Circum_Early_Relaxation,LV_Myo_Strain_Radial_Ratio,LV_Myo_Strain_Circum_Ratio,LV_Myo_Strain_Radial_Ratio_Early,...,LV_Myo_SVI,LV_Myo_EF,LV_SV,LV_SVI,LV_EF,dt,ed_cycle_time,es_cycle_time,ed_frame_idx,es_frame_idx
0,sub-001,-17.006488,5.331390,-14.889953,5.966350,1.965311,2.669541,0.875545,1.119098,-0.115562,...,-8.165436,-0.109282,66.406250,30.135667,0.225054,0.60,0.0,0.367347,1,11
1,sub-002,-18.106131,5.363983,-16.211021,6.062543,6.748317,1.934615,0.895333,1.130231,-0.372709,...,-15.927598,-0.170119,72.207525,40.937000,0.279725,0.60,0.0,0.408163,0,12
2,sub-003,-6.524100,2.485568,-5.910453,3.091729,-4.821624,3.614020,0.905942,1.243872,0.739048,...,-1.962331,-0.019713,29.833984,15.880585,0.107684,0.60,0.0,0.428571,28,13
3,sub-004,-13.391888,2.036483,-11.646865,3.000486,1.897255,1.254742,0.869696,1.473367,-0.141672,...,-11.002333,-0.101267,37.739320,26.476414,0.141377,0.56,0.0,0.571429,27,16
4,sub-005,-18.371355,3.916858,-18.899404,4.801919,1.334369,1.619229,1.028743,1.225962,-0.072633,...,-16.168566,-0.157619,63.281250,33.684513,0.216304,0.60,0.0,0.408163,3,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,sub-146,-12.445652,8.949890,-14.595965,9.310555,-8.084695,8.067976,1.172776,1.040298,0.649600,...,-12.903289,-0.304174,86.681127,30.414342,0.686203,0.60,0.0,0.326531,27,10
146,sub-147,-19.239698,11.596488,-19.163430,14.533290,1.104617,3.342547,0.996036,1.253249,-0.057413,...,-13.250804,-0.313249,76.738425,34.425048,0.668241,0.38,0.0,0.428571,18,8
147,sub-148,-12.448667,5.631392,-12.317982,6.022426,-5.792581,2.693265,0.989502,1.069438,0.465317,...,-6.374484,-0.117726,42.018533,23.110534,0.294109,0.70,0.0,0.285714,21,10
148,sub-149,-18.316905,6.255587,-18.323814,7.107434,-0.695980,2.584240,1.000377,1.136174,0.037997,...,-10.329066,-0.166233,61.402588,33.477820,0.333119,0.50,0.0,0.448980,0,11


In [7]:
print(df_data.columns)

df_data['LV_Myo_Max Thickness']

Index(['Subject', 'LV_Myo_Strain_Radial_Contraction',
       'LV_Myo_Strain_Circum_Contraction', 'LV_Myo_Strain_Radial_Relaxation',
       'LV_Myo_Strain_Circum_Relaxation',
       'LV_Myo_Strain_Radial_Early_Relaxation',
       'LV_Myo_Strain_Circum_Early_Relaxation', 'LV_Myo_Strain_Radial_Ratio',
       'LV_Myo_Strain_Circum_Ratio', 'LV_Myo_Strain_Radial_Ratio_Early',
       'LV_Myo_Strain_Circum_Ratio_Early', 'RV_Myo_Strain_Radial_Contraction',
       'RV_Myo_Strain_Circum_Contraction', 'RV_Myo_Strain_Radial_Relaxation',
       'RV_Myo_Strain_Circum_Relaxation',
       'RV_Myo_Strain_Radial_Early_Relaxation',
       'RV_Myo_Strain_Circum_Early_Relaxation', 'RV_Myo_Strain_Radial_Ratio',
       'RV_Myo_Strain_Circum_Ratio', 'RV_Myo_Strain_Radial_Ratio_Early',
       'RV_Myo_Strain_Circum_Ratio_Early', 'LV_Myo_Max Thickness',
       'LV_Myo_Min Thickness', 'LV_Myo_Std Thickness', 'LV_Myo_Mean Thickness',
       'LV_Myo_Max Radial Strain', 'LV_Myo_Max Circum Strain',
       'RV_Myo_Max 

0       9.615237
1      10.203852
2       9.480990
3       8.798091
4      10.740139
         ...    
145    10.688889
146    10.470586
147     8.954178
148     8.792927
149     7.430420
Name: LV_Myo_Max Thickness, Length: 150, dtype: float64