In [6]:
import os 
import pandas as pd
import numpy as np

In [7]:
derivatives_folder = "/media/jaume/DATA/Data/Urblauna_SFTP/UKB_Cardiac_BIDS/derivatives"
data_path = os.path.join(derivatives_folder, 'GraphClassification')
nodes_filename = os.path.join(derivatives_folder, "nodes_data.parquet")
global_filename = os.path.join(derivatives_folder, "global_data.parquet")

In [8]:
# Load the nodes data
nodes_data = pd.read_parquet(nodes_filename)
# Load the global data
global_data = pd.read_parquet(global_filename)

print(nodes_data.columns)
print(global_data.columns)

Index(['Intensity_Mean', 'Intensity_Median', 'Intensity_q1', 'Intensity_q3',
       'Intensity_IQ', 'Volume', 'CM_X', 'CM_Y', 'CM_Z', 'J_Mean', 'J_Median',
       'J_q1', 'J_q3', 'J_IQ', 'Def_X_Mean', 'Def_X_Median', 'Def_X_q1',
       'Def_X_q3', 'Def_X_IQ', 'Def_Y_Mean', 'Def_Y_Median', 'Def_Y_q1',
       'Def_Y_q3', 'Def_Y_IQ', 'Def_Z_Mean', 'Def_Z_Median', 'Def_Z_q1',
       'Def_Z_q3', 'Def_Z_IQ', 'Thickness_Mean', 'Thickness_Median',
       'Thickness_q1', 'Thickness_q3', 'Thickness_IQ', 'Strain_Circum',
       'Strain_Radial', 'Cycle', 'Region', 'Volume_Index', 'Subject',
       'Region_ID', 'Cycle_ID', 'Label', 'Label_ID'],
      dtype='object')
Index(['Subject', 'Height', 'Weight', 'BMI', 'BSA', 'Region', 'Cycle',
       'RV_Myo_SV', 'RV_Myo_SVI', 'RV_Myo_EF', 'RV_SV', 'RV_SVI', 'RV_EF',
       'LV_Myo_SV', 'LV_Myo_SVI', 'LV_Myo_EF', 'LV_SV', 'LV_SVI', 'LV_EF',
       'dt', 'ed_cycle_time', 'es_cycle_time', 'ed_frame_idx', 'es_frame_idx',
       'Group'],
      dtype='object')

In [9]:
def get_basic_biomarkers(node_info, region, subject='test'):
    node_info['Strain_Radial'] = node_info['Strain_Radial'].astype(float)
    node_info['Strain_Circum'] = node_info['Strain_Circum'].astype(float)
    node_info['Thickness_Mean'] = node_info['Thickness_Mean'].astype(float)

    max_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].max()
    min_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].min()
    std_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].std()
    mean_thick = node_info.query(f"Region=='{region}'")['Thickness_Mean'].mean()
    max_radial_strain = node_info.query(f"Region=='{region}'")['Strain_Radial'].max()
    max_circum_strain = node_info.query(f"Region=='{region}'")['Strain_Circum'].max()
    array_data = np.array([max_thick, min_thick, std_thick, mean_thick, max_radial_strain, max_circum_strain])[:, np.newaxis]
    col_names = ['Max Thickness', 'Min Thickness', 'Std Thickness', 'Mean Thickness', 'Max Radial Strain', 'Max Circum Strain']
    col_names = [f'{region}_{x}' for x in col_names]
    df_node_data = pd.DataFrame(array_data.T, columns=col_names, index=[subject])
    df_node_data.index.name = 'Subject'

    return df_node_data


def identify_early_relaxation(node_info, global_info):
    node_info['Volume_Index'] = node_info['Volume_Index'].astype(float)
    node_info['Thickness_Mean'] = node_info['Thickness_Mean'].astype(float)
    node_info['Cycle'] = node_info['Cycle'].astype(float)
    es_time = global_info['es_cycle_time'].astype(float).values[0]

    # Compute the derivative of the thickness
    volume_derivative_lv = node_info.query("Region=='LV'").set_index("Cycle").sort_index()[['Volume_Index']].diff().rolling(3).sum()
    thickness_derivative_lv = node_info.query("Region=='LV_Myo'").set_index("Cycle").sort_index()[['Thickness_Mean']].diff().rolling(3).sum()

    volume_derivative_rv = node_info.query("Region=='RV'").set_index("Cycle").sort_index()[['Volume_Index']].diff().rolling(3).sum()
    thickness_derivative_rv = node_info.query("Region=='RV_Myo'").set_index("Cycle").sort_index()[['Thickness_Mean']].diff().rolling(3).sum()

    # Identify 0s after ES time
    thickness_derivative_lv = thickness_derivative_lv[thickness_derivative_lv.index > es_time]
    volume_derivative_lv = volume_derivative_lv[volume_derivative_lv.index > es_time]

    thickness_derivative_rv = thickness_derivative_rv[thickness_derivative_rv.index > es_time]
    volume_derivative_rv = volume_derivative_rv[volume_derivative_rv.index > es_time]

    # For security discard first 3 time-pints and last 3 time-points
    if len(thickness_derivative_lv) < 5:
        early_relax_time_lv = thickness_derivative_lv.iloc[np.argmax(thickness_derivative_lv)].name
        early_relax_time_rv = thickness_derivative_rv.iloc[np.argmax(thickness_derivative_rv)].name
    else:
        early_relax_time_lv = thickness_derivative_lv.iloc[2:-2].iloc[np.argmax(thickness_derivative_lv.iloc[2:-2])].name
        early_relax_time_rv = thickness_derivative_rv.iloc[2:-2].iloc[np.argmax(thickness_derivative_rv.iloc[2:-2])].name
    early_relax_time = np.mean([early_relax_time_lv, early_relax_time_rv])

    # try:
    #     early_relax_time_lv = thickness_derivative_lv.iloc[2:-2].iloc[np.argmax(thickness_derivative_lv.iloc[2:-2])].name
    #     early_relax_time_rv = thickness_derivative_rv.iloc[2:-2].iloc[np.argmax(thickness_derivative_rv.iloc[2:-2])].name
    #     early_relax_time = np.mean([early_relax_time_lv, early_relax_time_rv])
    # except ValueError:
    #     print(thickness_derivative_lv)

    return early_relax_time


def get_relaxation_strain_and_ratios(node_info, global_info, early_relax_time, region):
    # Get contraction and relaxation times
    # es_idx = global_file['es_frame_idx'].values[0]
    node_info['Cycle'] = node_info['Cycle'].astype(float)
    ed_time = global_info['ed_cycle_time'].astype(float).values[0]
    es_time = global_info['es_cycle_time'].astype(float).values[0]
    es_data = node_info.query(f"Cycle=={es_time}").reset_index(drop=True).set_index('Region')
    ed_data = node_info.query(f"Cycle=={ed_time}").reset_index(drop=True).set_index('Region')
    end_data = node_info.query("Cycle==1").reset_index(drop=True).set_index('Region')

    # Find closest Cycle to early relaxation time
    cycle_times = node_info['Cycle'].astype(float).unique()
    distance = np.abs(cycle_times - early_relax_time)
    distance_idx = np.argmin(distance)
    early_relax_time_cycle = cycle_times[distance_idx]
    early_data = node_info.query(f"Cycle=={early_relax_time_cycle}").reset_index(drop=True).set_index('Region')

    # print(f"ED time: {ed_time}, ES time: {es_time}, Early Relaxation time: {early_relax_time}")
    # print(f"ED data: {ed_data.loc[region]}")
    # print(es_data.query(f"Region=='{region}'"))
    ed_data = ed_data.astype(float)
    es_data = es_data.astype(float)    
    contraction_data = (ed_data - es_data) #/ ed_data
    relaxation_data = (end_data - es_data) #/ ed_data
    early_relax_data = (early_data - es_data) #/ ed_data

    ratio_data = relaxation_data / contraction_data
    ratio_early = early_relax_data / contraction_data
    
    contraction_data = contraction_data.loc[[region]].astype(float)
    # contraction_data = contraction_data[['Thickness_Mean']].rename(columns={'Thickness_Mean': f'{region}_Strain_Radial_Contraction'})
    contraction_data = contraction_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Contraction',
                                                                                            'Strain_Circum': f'{region}_Strain_Circum_Contraction'})
    
    relaxation_data = relaxation_data.loc[[region]].astype(float)
    relaxation_data = relaxation_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Relaxation',
                                                                                          'Strain_Circum': f'{region}_Strain_Circum_Relaxation'})
    
    early_relax_data = early_relax_data.loc[[region]].astype(float)
    early_relax_data = early_relax_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Early_Relaxation',
                                                                                            'Strain_Circum': f'{region}_Strain_Circum_Early_Relaxation'})
    
    ratio_data = ratio_data.loc[[region]].astype(float)
    ratio_data = ratio_data[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Ratio',
                                                                                'Strain_Circum': f'{region}_Strain_Circum_Ratio'})
    
    ratio_early = ratio_early.loc[[region]].astype(float)
    ratio_early = ratio_early[['Strain_Radial', 'Strain_Circum']].rename(columns={'Strain_Radial': f'{region}_Strain_Radial_Ratio_Early',
                                                                                  'Strain_Circum': f'{region}_Strain_Circum_Ratio_Early'})
    
    contraction_data = contraction_data.astype(float)
    relaxation_data = relaxation_data.astype(float)
    early_relax_data = early_relax_data.astype(float)
    ratio_data = ratio_data.astype(float)
    ratio_early = ratio_early.astype(float)

    summary_strain = pd.concat([contraction_data, relaxation_data, early_relax_data, ratio_data, ratio_early], axis=1)

    # summary_strain = pd.concat([contraction_data.loc[region].rename(f'{region}_Strain_Contraction'),
    #                             relaxation_data.loc[region].rename(f'{region}_Strain_Relaxtion'),
    #                             early_relax_data.loc[region].rename(f'{region}_Strain_Early_Relaxtion'),
    #                             ratio_data.loc[region].rename(f'{region}_Strain_Ratio'),
    #                             ratio_early.loc[region].rename(f'{region}_Strain_Ratio_Early')], axis=1)

    return summary_strain

In [10]:
# From the nodes data
# ==========================
# Get the LV and RV biomarkers
# ==========================
biomarkers_filename = os.path.join(derivatives_folder, "biomarkers.csv")
redo = True
if not os.path.isfile(biomarkers_filename) or redo:
    df_data = pd.DataFrame(data=[])
    subjects = list(nodes_data['Subject'].unique())
    for subject in subjects:        
        nodes_file = nodes_data.query(f"Subject=='{subject}'").copy()
        nodes_file.set_index('Subject', inplace=True)
        nodes_file.drop(columns=['Label'], inplace=True)

        global_file = global_data.query(f"Subject=='{subject}'").copy()
        global_file.set_index('Subject', inplace=True)

        df_node_data_lv = get_basic_biomarkers(nodes_file, 'LV_Myo', subject=subject)
        df_node_data_rv = get_basic_biomarkers(nodes_file, 'RV_Myo', subject=subject)
        early_relax_time = identify_early_relaxation(nodes_file, global_file)
        summary_lv = get_relaxation_strain_and_ratios(nodes_file, global_file, early_relax_time, 'LV_Myo')
        summary_rv = get_relaxation_strain_and_ratios(nodes_file, global_file, early_relax_time, 'RV_Myo')

        summary_lv.index = [subject]
        summary_lv.index.name = 'Subject'

        summary_rv.index = [subject]
        summary_rv.index.name = 'Subject'

        # summ_thick_lv = summary_lv.loc[['Thickness_Mean']].rename(index={'Thickness_Mean': subject})
        # summ_thick_rv = summary_rv.loc[['Thickness_Mean']].rename(index={'Thickness_Mean': subject})

        df_node_data = pd.concat([summary_lv, summary_rv, df_node_data_lv, df_node_data_rv, global_file], axis=1)
        df_data = pd.concat([df_data, df_node_data])
    
    df_data.index.name = 'Subject'
    df_data.reset_index(inplace=True)
    df_data.sort_values('Subject', inplace=True)
    df_data.reset_index(drop=True, inplace=True)
    df_data.to_csv(biomarkers_filename)
else:
    df_data = pd.read_csv(biomarkers_filename, index_col=0)

df_data.head()

KeyboardInterrupt: 

In [None]:
df_data

Unnamed: 0,Subject,LV_Myo_Strain_Radial_Contraction,LV_Myo_Strain_Circum_Contraction,LV_Myo_Strain_Radial_Relaxation,LV_Myo_Strain_Circum_Relaxation,LV_Myo_Strain_Radial_Early_Relaxation,LV_Myo_Strain_Circum_Early_Relaxation,LV_Myo_Strain_Radial_Ratio,LV_Myo_Strain_Circum_Ratio,LV_Myo_Strain_Radial_Ratio_Early,...,LV_Myo_EF,LV_SV,LV_SVI,LV_EF,dt,ed_cycle_time,es_cycle_time,ed_frame_idx,es_frame_idx,Group
0,sub-1007266,-19.535788,13.316388,-21.236435,13.918658,2.766425,2.444549,1.087053,1.045228,-0.141608,...,-0.142582,59.576758,29.053092,0.529988,0.03156,0.0,0.346939,0,17,0.0
1,sub-1016533,-22.781580,14.212189,-22.519868,15.296291,-24.467670,11.367898,0.988512,1.076280,1.074011,...,-0.246637,57.507478,33.598422,0.535925,0.03156,0.0,0.326531,48,16,1.0
2,sub-1016992,-21.271842,11.889501,-21.338267,12.608935,-6.432232,6.514882,1.003123,1.060510,0.302382,...,-0.027415,64.917031,31.863534,0.429739,0.03156,0.0,0.306122,49,15,1.0
3,sub-1022077,-29.599376,19.356297,-29.760957,19.861341,4.086071,2.668886,1.005459,1.026092,-0.138046,...,0.158562,37.414906,21.760837,0.629068,0.03156,0.0,0.346939,0,17,1.0
4,sub-1046939,-23.595023,11.537086,-24.614183,11.864026,-11.584166,6.127666,1.043194,1.028338,0.490958,...,-0.058013,90.175880,47.441062,0.493359,0.03144,0.0,0.346939,39,17,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,sub-5970004,-22.654461,12.472668,-24.096866,12.481351,-9.013668,5.521982,1.063670,1.000696,0.397876,...,-0.175637,63.096900,30.893745,0.533097,0.03144,0.0,0.346939,0,17,0.0
409,sub-5989569,-26.662580,12.385794,-27.871815,12.486512,-27.207603,12.856404,1.045353,1.008132,1.020442,...,-0.101957,90.020953,38.108146,0.522696,0.03156,0.0,0.346939,45,17,0.0
410,sub-5992219,-18.603539,10.490648,-19.110932,11.503288,2.143907,1.259171,1.027274,1.096528,-0.115242,...,-0.020339,64.883655,36.309129,0.551020,0.03156,0.0,0.448980,48,22,0.0
411,sub-6018688,-21.807866,10.153567,-22.615060,10.637972,0.755314,0.811286,1.037014,1.047708,-0.034635,...,-0.035742,91.017349,43.777202,0.546603,0.03156,0.0,0.367347,2,18,0.0


In [None]:
print(df_data.columns)

df_data['LV_Myo_Max Thickness']

Index(['Subject', 'LV_Myo_Strain_Radial_Contraction',
       'LV_Myo_Strain_Circum_Contraction', 'LV_Myo_Strain_Radial_Relaxation',
       'LV_Myo_Strain_Circum_Relaxation',
       'LV_Myo_Strain_Radial_Early_Relaxation',
       'LV_Myo_Strain_Circum_Early_Relaxation', 'LV_Myo_Strain_Radial_Ratio',
       'LV_Myo_Strain_Circum_Ratio', 'LV_Myo_Strain_Radial_Ratio_Early',
       'LV_Myo_Strain_Circum_Ratio_Early', 'RV_Myo_Strain_Radial_Contraction',
       'RV_Myo_Strain_Circum_Contraction', 'RV_Myo_Strain_Radial_Relaxation',
       'RV_Myo_Strain_Circum_Relaxation',
       'RV_Myo_Strain_Radial_Early_Relaxation',
       'RV_Myo_Strain_Circum_Early_Relaxation', 'RV_Myo_Strain_Radial_Ratio',
       'RV_Myo_Strain_Circum_Ratio', 'RV_Myo_Strain_Radial_Ratio_Early',
       'RV_Myo_Strain_Circum_Ratio_Early', 'LV_Myo_Max Thickness',
       'LV_Myo_Min Thickness', 'LV_Myo_Std Thickness', 'LV_Myo_Mean Thickness',
       'LV_Myo_Max Radial Strain', 'LV_Myo_Max Circum Strain',
       'RV_Myo_Max 

0       9.384370
1       8.736912
2       9.601362
3       9.986861
4       8.985480
         ...    
435     8.577734
436     8.926614
437     7.988200
438    11.895388
439     9.091235
Name: LV_Myo_Max Thickness, Length: 440, dtype: float64