# Compare the General Performance of CABG
 - Create a dataframe containing the severity of stenosis using FFR, and if the stenosis subtended territory is ischemic.
 - Add the point-based or voxel-based MBF value to the data-frame (or create a new one?)
 - Violin Plots:
    - Pre/Post MBF against ischemic-grafted, non-ischemic-grafted, non-ischemic nongrafted
    - Pre/Post MBF against severity of stenosis (none, mild, moderate, severe)

In [None]:
import glob
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from utilities import ReadVTUFile, ThresholdInBetween, ThresholdByUpper, LargestConnectedRegion
from vtk.util.numpy_support import vtk_to_numpy, numpy_to_vtk

In [None]:
path = "/Users/ana/Documents/AnahitaSeresti/05_PrePostCABG/InterPatientStudy"
metadata = glob.glob(f"{path}/*.xlsx")
VTUFiles = sorted(glob.glob(f"{path}/*.vtu"))
TerritoryLabels = sorted(glob.glob(f"{path}/*.dat"))
print(VTUFiles)

PatientMetadata = pd.read_excel(metadata[0], dtype={'column_name': str})
print(PatientMetadata)

In [None]:
patient_ids = sorted(set(PatientMetadata["patient-id"]))
print(patient_ids)

In [None]:
def ReadLabels(InputLabels, TerritoryTag):
    MBF_Labels = {}
    for tag in TerritoryTag:
        MBF_Labels[tag] =  []
    MBF_Labels["Nongrafted-Stenosis"] = []
    MBF_Labels["Nongrafted"] = []
    with open(InputLabels, "r") as ifile:
        for i, LINE in enumerate(ifile):
            if i == 0: 
                continue
            line = LINE.strip().split()
            label = line[1]
            id_value = int(line[0])
            found = False

            for key in TerritoryTag:
                if key in label: 
                    MBF_Labels[key].append(id_value)
                    found = True
            
            if not found:
                if "NG" in label:
                    MBF_Labels["Nongrafted-Stenosis"].append(id_value)
                    found = True
                else:
                    MBF_Labels["Nongrafted"].append(id_value)

    MBF_Labels = {k.replace('post_', ''):v for k,v in MBF_Labels.items() if len(v)>0}
    
    return MBF_Labels

def robust_mode(data, bins=50):
    counts, bin_edges = np.histogram(data, bins=bins)
    max_bin_index = np.argmax(counts)
    mode_value = (bin_edges[max_bin_index] + bin_edges[max_bin_index + 1]) / 2
    return mode_value

def Normalize(MBF, method="mode", cutoff = 0):
    for i in range(MBF.GetPointData().GetNumberOfArrays()):
        arrayname_ = MBF.GetPointData().GetArrayName(i)
        if 'scalars' in arrayname_.lower():
            ArrayName = arrayname_
    
    HealthyMyo = LargestConnectedRegion(ThresholdByUpper(MBF, ArrayName, cutoff))
    ScalarArray_healthy = vtk_to_numpy(HealthyMyo.GetPointData().GetArray(ArrayName))
    ScalarArray = vtk_to_numpy(MBF.GetPointData().GetArray(ArrayName))
    if method == "mode": # Histogram Analysis
        # Using Freedman-Diaconis Rule to calculate the number of bins based on IQR
        IQR = np.percentile(ScalarArray_healthy, 75) - np.percentile(ScalarArray_healthy, 25)
        bin_width = 2*IQR/(len(ScalarArray_healthy))**(1/3)
        MBF_range = np.max(ScalarArray_healthy) - np.min(ScalarArray_healthy)
        bins = int(MBF_range/bin_width)
        ref_MBF = robust_mode(ScalarArray_healthy, bins)
    elif method == "median":
        ref_MBF = np.median(ScalarArray_healthy)
    elif method == "mean":
        ref_MBF = np.mean(ScalarArray_healthy)
    elif method == "old":
        ref_MBF = np.percentile(ScalarArray, 75)
    elif method == "zscore":
        q1, q2, q3 = np.percentile(ScalarArray_healthy, [25, 50, 75])
        IndexMBFArray = (ScalarArray - q2)/(q3 - q1)
        IndexMBF = numpy_to_vtk(IndexMBFArray)
        IndexMBF.SetName("IndexMBF")
        MBF.GetPointData().AddArray(IndexMBF)

        return MBF
    else:
        ref_MBF = np.percentile(ScalarArray_healthy, 75)
    
    IndexMBFArray = ScalarArray/ref_MBF
    IndexMBF = numpy_to_vtk(IndexMBFArray)
    IndexMBF.SetName("IndexMBF")
    MBF.GetPointData().AddArray(IndexMBF)

    return MBF


def CollectMBFData(MBF, Labels):
    for i in range(MBF.GetPointData().GetNumberOfArrays()):
            arrayname_ = MBF.GetPointData().GetArrayName(i)
            if 'scalars' in arrayname_.lower():
                ScalarArray = arrayname_

    AbsMBFData = {}
    IndexMBFData = {}
    for key in Labels.keys():
        AbsMBFData[key] = np.array([])
        IndexMBFData[key] = np.array([])
        for i in Labels[key]:
            territory_ = ThresholdInBetween(MBF, "TerritoryMaps", i, i)
            MBF_ = vtk_to_numpy(territory_.GetPointData().GetArray(ScalarArray))
            iMBF_ = vtk_to_numpy(territory_.GetPointData().GetArray("IndexMBF"))
            AbsMBFData[key] = np.append(AbsMBFData[key], MBF_)
            IndexMBFData[key] = np.append(IndexMBFData[key], iMBF_)
    

    return AbsMBFData, IndexMBFData

def BoxPlot(DataFrame):
    plt.figure(figsize=(8, 5))
    ax = sns.boxplot(x='Territories', y='MBF', hue='Type', data= DataFrame, showfliers=False, palette={'A':'skyblue', 'B':'salmon'})

    ax.set_ylabel("Index MBF", fontsize = 20)
    ax.tick_params(axis='x', rotation=10, labelsize=15)
    ax.legend(loc = 'upper left')
    plt.tight_layout()
    plt.show()




In [None]:
print(PatientMetadata[PatientMetadata['patient-id'] == 'SU03']['stenosis tag'])

In [None]:
data = []
for patient_id in patient_ids:
    if patient_id == 'SU23':
        continue
    tags = list(PatientMetadata[PatientMetadata['patient-id'] == patient_id]['stenosis tag'])
    ischemia = list(PatientMetadata[PatientMetadata['patient-id'] == patient_id]['is-ischemic'])
    TerritoryTags = [f'post_{tag}' for tag in tags]
    cutoff = list(PatientMetadata[PatientMetadata['patient-id'] == patient_id]['cutoff'])[0]
    for file in TerritoryLabels:
        if patient_id in os.path.basename(os.path.splitext(file)[0]):
            LabelFile = file
    MBF_Labels = ReadLabels(LabelFile, TerritoryTags)
    normalization_method = "mode"
    for file in VTUFiles:
        if f"{patient_id}A" in os.path.basename(os.path.splitext(file)[0]):
            MBF_A = Normalize(ReadVTUFile(file), normalization_method, cutoff)
        elif f"{patient_id}B" in os.path.basename(os.path.splitext(file)[0]):
            MBF_B = Normalize(ReadVTUFile(file), normalization_method, 0)
    AbsMBFData_A, IndexMBFData_A = CollectMBFData(MBF_A, MBF_Labels)
    AbsMBFData_B, IndexMBFData_B = CollectMBFData(MBF_B, MBF_Labels)
    for i, t in enumerate(tags):
        data_ = {"patient_id": patient_id, "tag": t, "is_ischemic": ischemia[i], "absMBF_A": AbsMBFData_A[t], "absMBF_B": AbsMBFData_B[t], "IndexMBF_A": IndexMBFData_A[t], "IndexMBF_B": IndexMBFData_B[t]}
        data.append(data_)
    t = "Nongrafted-Stenosis"
    if t in MBF_Labels.keys():
        data_ = {"patient_id": patient_id, "tag": t, "is_ischemic": False, "absMBF_A": AbsMBFData_A[t], "absMBF_B": AbsMBFData_B[t], "IndexMBF_A": IndexMBFData_A[t], "IndexMBF_B": IndexMBFData_B[t]}
        data.append(data_)
    t = "Nongrafted"
    data_ = {"patient_id": patient_id, "tag": t, "is_ischemic": False, "absMBF_A": AbsMBFData_A[t], "absMBF_B": AbsMBFData_B[t], "IndexMBF_A": IndexMBFData_A[t], "IndexMBF_B": IndexMBFData_B[t]}
    data.append(data_)

df = pd.DataFrame(data)
df


In [None]:
print("TerritoryTag", "Median", "IQR", "Mean", "std")
index_mbf_ischemic_A = np.concatenate(df[df['is_ischemic'] == True]['IndexMBF_A'].values)
q1, q2, q3 = np.percentile(index_mbf_ischemic_A, [25, 50, 75])
average = np.mean(index_mbf_ischemic_A)
std = np.std(index_mbf_ischemic_A)
print("ischemic_grafted_A", q2, q3-q1, average, std)

index_mbf_ischemic_B = np.concatenate(df[df['is_ischemic'] == True]['IndexMBF_B'].values)
q1, q2, q3 = np.percentile(index_mbf_ischemic_B, [25, 50, 75])
average = np.mean(index_mbf_ischemic_B)
std = np.std(index_mbf_ischemic_B)
print("ischemic_grafted_B",q2, q3-q1, average, std)

index_mbf_nonischemic_A = np.concatenate(df[(df['is_ischemic'] == False) & (df['tag'] != 'Nongrafted') & (df['tag'] != "Nongrafted-Stenosis")]['IndexMBF_A'].values)
q1, q2, q3 = np.percentile(index_mbf_nonischemic_A, [25, 50, 75])
average = np.mean(index_mbf_nonischemic_A)
std = np.std(index_mbf_nonischemic_A)
print("nonischemic_grafted_A", q2, q3-q1, average, std)

index_mbf_nonischemic_B = np.concatenate(df[(df['is_ischemic'] == False) & (df['tag'] != 'Nongrafted') & (df['tag'] != "Nongrafted-Stenosis")]['IndexMBF_B'].values)
q1, q2, q3 = np.percentile(index_mbf_nonischemic_B, [25, 50, 75])
average = np.mean(index_mbf_nonischemic_B)
std = np.std(index_mbf_nonischemic_B)
print("nonischemic_grafted_B", q2, q3-q1, average, std)

index_mbf_st_nongrafted_A = np.concatenate(df[df['tag'] == 'Nongrafted-Stenosis']['IndexMBF_A'].values)
q1, q2, q3 = np.percentile(index_mbf_st_nongrafted_A, [25, 50, 75])
average = np.mean(index_mbf_st_nongrafted_A)
std = np.std(index_mbf_st_nongrafted_A)
print("stenosed_nongrafted_A", q2, q3-q1, average, std)

index_mbf_st_nongrafted_B = np.concatenate(df[df['tag'] == 'Nongrafted-Stenosis']['IndexMBF_B'].values)
q1, q2, q3 = np.percentile(index_mbf_st_nongrafted_B, [25, 50, 75])
average = np.mean(index_mbf_st_nongrafted_B)
std = np.std(index_mbf_st_nongrafted_B)
print("stenosed_nongrafted_B", q2, q3-q1, average, std)


index_mbf_nongrafted_A = np.concatenate(df[df['tag'] == 'Nongrafted']['IndexMBF_A'].values)
q1, q2, q3 = np.percentile(index_mbf_nongrafted_A, [25, 50, 75])
average = np.mean(index_mbf_nongrafted_A)
std = np.std(index_mbf_nongrafted_A)
print("nongrafted_A", q2, q3-q1, average, std)

index_mbf_nongrafted_B = np.concatenate(df[df['tag'] == 'Nongrafted']['IndexMBF_B'].values)
q1, q2, q3 = np.percentile(index_mbf_nongrafted_B, [25, 50, 75])
average = np.mean(index_mbf_nongrafted_B)
std = np.std(index_mbf_nongrafted_B)
print("nongrafted_B", q2, q3-q1, average, std)


"""
def downsample(array, size=1000):
    if len(array) > size:
        return np.random.choice(array, size, replace=False)
    return array

index_mbf_ischemic_A = downsample(index_mbf_ischemic_A)
index_mbf_ischemic_B = downsample(index_mbf_ischemic_B)
index_mbf_nonischemic_A = downsample(index_mbf_nonischemic_A)
index_mbf_nonischemic_B = downsample(index_mbf_nonischemic_B)
index_mbf_nongrafted_A = downsample(index_mbf_nongrafted_A)
index_mbf_nongrafted_B = downsample(index_mbf_nongrafted_B)
"""

In [None]:
# Create a DataFrame with each part
df_A = pd.DataFrame({
    'MBF': np.concatenate([index_mbf_ischemic_A, index_mbf_nonischemic_A, index_mbf_st_nongrafted_A, index_mbf_nongrafted_A]),
    'Territory': ['Ischemic Grafted'] * len(index_mbf_ischemic_A) + 
             ['Non-ischemic Grafted'] * len(index_mbf_nonischemic_A) + 
             ['Nongrafted Stenosis'] * len(index_mbf_st_nongrafted_A) +
             ['Nongrafted'] * len(index_mbf_nongrafted_A),
    'Type': ['A'] * (len(index_mbf_ischemic_A) + len(index_mbf_nonischemic_A) + len(index_mbf_st_nongrafted_A) + len(index_mbf_nongrafted_A))
})

df_B = pd.DataFrame({
    'MBF': np.concatenate([index_mbf_ischemic_B, index_mbf_nonischemic_B, index_mbf_st_nongrafted_B, index_mbf_nongrafted_B]),
    'Territory': ['Ischemic Grafted'] * len(index_mbf_ischemic_B) + 
             ['Non-ischemic Grafted'] * len(index_mbf_nonischemic_B) + 
             ['Nongrafted Stenosis'] * len(index_mbf_st_nongrafted_B) +
             ['Nongrafted'] * len(index_mbf_nongrafted_B),
    'Type': ['B'] * (len(index_mbf_ischemic_B) + len(index_mbf_nonischemic_B) + len(index_mbf_st_nongrafted_B) + len(index_mbf_nongrafted_B))
})

# Combine into a single DataFrame
df_long = pd.concat([df_A, df_B], ignore_index=True)

In [None]:
plt.figure(figsize=(10, 6))

sns.violinplot(x='Territory', y='MBF', hue='Type', data = df_long, split=True, inner = 'quartiles', bw_adjust = 10, common_norm=True, linewidth= 2, palette={'A':'skyblue', 'B':'salmon'})

plt.ylabel("Index MBF")
#plt.xlabel("Group")
plt.legend(title="Pre/Post CABG")
plt.ylim([-0.5, 3])
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
ax = sns.boxplot(x='Territory', y='MBF', hue='Type', data= df_long, showfliers=False, palette={'A':'skyblue', 'B':'salmon'})

ax.set_ylabel("Index MBF", fontsize = 20)
ax.tick_params(axis='x', rotation=0, labelsize=10)
ax.legend(loc = 'upper left')
plt.tight_layout()
plt.show()

In [None]:
for patient_id in patient_ids:
    print(patient_id)
    tags = list(PatientMetadata[PatientMetadata['patient-id'] == patient_id]['stenosis tag'])
    ischemia = list(PatientMetadata[PatientMetadata['patient-id'] == patient_id]['is-ischemic'])
    TerritoryTags = [f'post_{tag}' for tag in tags]
    cutoff = list(PatientMetadata[PatientMetadata['patient-id'] == patient_id]['cutoff'])[0]
    for file in TerritoryLabels:
        if patient_id in os.path.basename(os.path.splitext(file)[0]):
            LabelFile = file
    MBF_Labels = ReadLabels(LabelFile, TerritoryTags)
    normalization_method = "mode"
    for file in VTUFiles:
        if f"{patient_id}A" in os.path.basename(os.path.splitext(file)[0]):
            MBF_A = Normalize(ReadVTUFile(file), normalization_method, cutoff)
        elif f"{patient_id}B" in os.path.basename(os.path.splitext(file)[0]):
            MBF_B = Normalize(ReadVTUFile(file), normalization_method, 0)
    AbsMBFData_A, IndexMBFData_A = CollectMBFData(MBF_A, MBF_Labels)
    AbsMBFData_B, IndexMBFData_B = CollectMBFData(MBF_B, MBF_Labels)
    data = pd.DataFrame({"MBF": [], "Territories": [], "Type": []})
    for i, t in enumerate(tags):
        data_ = pd.DataFrame({"MBF": np.concatenate([IndexMBFData_A[t], IndexMBFData_B[t]]), "Territories": [t]*len(IndexMBFData_A[t]) + [t]*len(IndexMBFData_B[t]), "Type": ['A']*len(IndexMBFData_A[t]) + ['B']*len(IndexMBFData_B[t])})
        data = pd.concat([data, data_], ignore_index=True)
    t = "Nongrafted-Stenosis"
    if t in MBF_Labels.keys():
        data_ = pd.DataFrame({"MBF": np.concatenate([IndexMBFData_A[t], IndexMBFData_B[t]]), "Territories": [t]*len(IndexMBFData_A[t]) + [t]*len(IndexMBFData_B[t]), "Type": ['A']*len(IndexMBFData_A[t]) + ['B']*len(IndexMBFData_B[t])})
        data = pd.concat([data, data_], ignore_index=True)
    t = "Nongrafted"
    data_ = pd.DataFrame({"MBF": np.concatenate([IndexMBFData_A[t], IndexMBFData_B[t]]), "Territories": [t]*len(IndexMBFData_A[t]) + [t]*len(IndexMBFData_B[t]), "Type": ['A']*len(IndexMBFData_A[t]) + ['B']*len(IndexMBFData_B[t])})
    data = pd.concat([data, data_], ignore_index=True)
    BoxPlot(data)
