# Load packages

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

# Set up paths

In [3]:
code_dir = Path.cwd()
statistics_dir = code_dir.parent
source_dir = statistics_dir / "input"
output_dir = statistics_dir / "output/sensitivity_analysis/nonans"
output_dir.mkdir(exist_ok=True, parents=True)

# Load data

In [4]:
statistics_df = pd.read_csv(source_dir / 'statistics_df_randomized.csv', index_col = "sub_id")

# Set up R environment

In [None]:
import os
import rpy2.robjects as robjects

# Set the R_HOME environment variable
os.environ['R_HOME'] = '/usr/lib/R/'

# Update the library paths
new_path = "/home/csi/R/x86_64-pc-linux-gnu-library/4.3"
robjects.r(f'.libPaths(c("{new_path}", .libPaths()))')


# Linear mixed effects models comparing BBB leakage between lesion, penumbra and normal tissue

In [None]:
from pymer4.models import Lmer

# Prepare dataframe
repeatead_measures = ['nice_normal_z_ef','nice_penumbra_z_ef','nice_lesion_z_ef']
df_lme = pd.melt(statistics_df.reset_index().dropna(subset=repeatead_measures), 
                 id_vars=['sub_id','AGE', 'SEX', 'NIHSSSCORE_V00', 'stroke_volume_v00', 'scanner'], 
                 value_vars=repeatead_measures, 
                 var_name="loc_measurement", 
                 value_name="EF", ignore_index=False).reset_index(drop=True)

# Z-score age, stroke volume, NIHSS and EF
columns_to_normalize = ['AGE', 'stroke_volume_v00', 'NIHSSSCORE_V00', 'EF']
df_lme[columns_to_normalize] = df_lme[columns_to_normalize].apply(lambda x: (x - x.mean()) / x.std())

# Initialize model instance using 1 predictor with random intercepts and slopes
model = Lmer("EF ~ loc_measurement + AGE + SEX + NIHSSSCORE_V00 + stroke_volume_v00 + (1|sub_id) + (1|scanner)", data=df_lme)

# Fit LMM 
lme = model.fit(factors={"loc_measurement": ['nice_normal_z_ef','nice_lesion_z_ef', 'nice_penumbra_z_ef']})
lme.to_csv(output_dir / "lesion_penumbra_normal_lme.csv")
print(lme)

# Get ANOVA table
anova = model.anova()
anova.to_csv(output_dir / "lesion_penumbra_normal_anova.csv")
anova

In [None]:
# Compute post-hoc tests
marginal_estimates, comparisons = model.post_hoc(marginal_vars="loc_measurement", grouping_vars="loc_measurement")

# "Cell" means of the ANOVA
comparisons.to_csv(output_dir/"lesion_penumbra_normal_posthoc.csv")
print(comparisons)


In [None]:
repeatead_measures = ['nice_penumbra_noinfarct_z_ef', 'nice_penumbra_infarct_z_ef']
statistics_df[repeatead_measures].describe()

## Visualization

In [17]:
location_styled = [
    'Normal Tissue',
    'Penumbra (Tmax>6s)',
    'Ischemic Core'
]

ef_styled = ['EF (z-scored)']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

repeatead_measures = ['nice_normal_z_ef','nice_penumbra_z_ef','nice_lesion_z_ef']
df_lme = pd.melt(statistics_df.reset_index().dropna(subset=repeatead_measures), 
                 id_vars=['sub_id','AGE', 'SEX', 'NIHSSSCORE_V00', 'stroke_volume_v00', 'scanner'], 
                 value_vars=repeatead_measures, 
                 var_name="loc_measurement", 
                 value_name="EF", ignore_index=False).reset_index(drop=True)

custom_palette = sns.color_palette("Paired")


plt.figure(figsize=(8, 6))
sns.boxplot(y='EF', x='loc_measurement', data=df_lme, width=0.5, palette=custom_palette, boxprops=dict(alpha=0.7), showfliers=False)
sns.stripplot(y='EF', x='loc_measurement', data=df_lme, color='black', size=3, jitter=True, alpha=0.6)

# Annotations for significance
y_max = df_lme['EF'].max()
y_step = y_max * 0.12  # Calculate step size for annotations based on the maximum EF value

# Ensure the calculation of 'y' is correct by explicitly converting 'i' and 'y_step' to compatible types
for i, row in comparisons.iterrows():
    groups = row['Contrast'].split(' - ')
    p_value = row['P-val']
    
    # Assuming group names in 'loc_measurement' match those in 'Comparison'
    group_labels = df_lme['loc_measurement'].unique().tolist()
    x1 = group_labels.index(groups[0])
    x2 = group_labels.index(groups[1])
    
    # Correctly calculate 'y' by ensuring 'i' and 'y_step' are compatible types
    y = y_max + ((float(i)) * y_step)  # Added (i+1) to ensure spacing starts above the max value
    
    # Adjust 'p_text' based on your significance criteria
    p_text = '***' if p_value < 0.001 else '**' if p_value < 0.01 else '*' if p_value < 0.05 else 'ns'
    
    plt.plot([x1, x1, x2, x2], [y - y_step/4, y, y, y - y_step/4], lw=1.5, c='black')
    plt.text((x1 + x2) * 0.5, y, p_text, ha='center', va='bottom')


plt.xticks(ticks=np.arange(len(location_styled)), labels=location_styled, size=10)
plt.xlabel('')
plt.ylabel('EF (z-scored)')

plt.savefig(output_dir/"boxplot_lesion_penumbra_normal_ef.png", dpi=300)


# Linear mixed effects models comparing BBB leakage within the perfusion deficit

In [None]:
from pymer4.models import Lmer

# Prepare dataframe
statistics_df_clean = statistics_df.dropna(subset=['nice_penumbra_z_ef'])
repeatead_measures = ['nice_tmax6_z_ef','nice_tmax8_z_ef', 'nice_tmax10_z_ef']
df_lme = pd.melt(statistics_df_clean.reset_index().dropna(subset=repeatead_measures), 
                 id_vars=['sub_id','AGE', 'SEX', 'NIHSSSCORE_V00', 'stroke_volume_v00', 'scanner'], 
                 value_vars=repeatead_measures, 
                 var_name="loc_measurement", 
                 value_name="EF", ignore_index=False).reset_index(drop=True)

# Z-score age, stroke volume, NIHSS and EF
columns_to_normalize = ['AGE', 'stroke_volume_v00', 'NIHSSSCORE_V00', 'EF']
df_lme[columns_to_normalize] = df_lme[columns_to_normalize].apply(lambda x: (x - x.mean()) / x.std())

# Define model
model = Lmer("EF ~ loc_measurement + AGE + SEX + NIHSSSCORE_V00 + stroke_volume_v00 + (1|sub_id) + (1|scanner)", data=df_lme)

# Fit LMM 
lme = model.fit(factors={"loc_measurement": ['nice_tmax6_z_ef','nice_tmax8_z_ef', 'nice_tmax10_z_ef']})
lme.to_csv(output_dir / "perfdef_lme.csv")
print(lme)

# Get ANOVA table
anova = model.anova()
anova.to_csv(output_dir / "perfdef_anova.csv")
anova

In [None]:
# Compute post-hoc tests
marginal_estimates, comparisons = model.post_hoc(marginal_vars="loc_measurement", grouping_vars="loc_measurement")

# "Cell" means of the ANOVA
comparisons.to_csv(output_dir/"perfdef_posthoc.csv")
print(comparisons)


In [21]:
location_styled = [
    'Tmax 6-8s',
    'Tmax 8-10s',
    'Tmax >/=10s'
]

ef_styled = ['EF (z-scored)']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

statistics_df_clean = statistics_df.dropna(subset=['nice_penumbra_z_ef'])
repeatead_measures = ['nice_tmax6_z_ef','nice_tmax8_z_ef', 'nice_tmax10_z_ef']
df_lme = pd.melt(statistics_df_clean.reset_index().dropna(subset=repeatead_measures), 
                 id_vars=['sub_id','AGE', 'SEX', 'NIHSSSCORE_V00', 'stroke_volume_v00', 'scanner'], 
                 value_vars=repeatead_measures, 
                 var_name="loc_measurement", 
                 value_name="EF", ignore_index=False).reset_index(drop=True)

custom_palette = sns.color_palette("Paired")


plt.figure(figsize=(8, 6))
sns.boxplot(y='EF', x='loc_measurement', data=df_lme, width=0.5, palette=custom_palette, boxprops=dict(alpha=0.7), showfliers=False)
sns.stripplot(y='EF', x='loc_measurement', data=df_lme, color='black', size=3, jitter=True, alpha=0.6)

# Annotations for significance
y_max = df_lme['EF'].max()
y_step = y_max * 0.12  # Calculate step size for annotations based on the maximum EF value

# Ensure the calculation of 'y' is correct by explicitly converting 'i' and 'y_step' to compatible types
for i, row in comparisons.iterrows():
    groups = row['Contrast'].split(' - ')
    p_value = row['P-val']
    
    # Assuming group names in 'loc_measurement' match those in 'Comparison'
    group_labels = df_lme['loc_measurement'].unique().tolist()
    x1 = group_labels.index(groups[0])
    x2 = group_labels.index(groups[1])
    
    # Correctly calculate 'y' by ensuring 'i' and 'y_step' are compatible types
    y = y_max + ((float(i)) * y_step)  # Added (i+1) to ensure spacing starts above the max value
    
    # Adjust 'p_text' based on your significance criteria
    p_text = '***' if p_value < 0.001 else '**' if p_value < 0.01 else '*' if p_value < 0.05 else 'ns'
    
    plt.plot([x1, x1, x2, x2], [y - y_step/4, y, y, y - y_step/4], lw=1.5, c='black')
    plt.text((x1 + x2) * 0.5, y, p_text, ha='center', va='bottom')


plt.xticks(ticks=np.arange(len(location_styled)), labels=location_styled, size=10)
plt.xlabel('')
plt.ylabel('EF (z-scored)')

plt.savefig(output_dir/"boxplot_perfdef_ef.png", dpi=300)


# Linear mixed effects models comparing BBB leakage  within penumbra according to future infarction; controlling for Tmax and treatment

In [6]:
# EF dataframe
statistics_df_clean = statistics_df.dropna(subset=['nice_penumbra_noinfarct_z_ef', 'nice_penumbra_infarct_z_ef', 'nice_penumbra_noinfarct_mean_tmax_rapid', 'nice_penumbra_infarct_mean_tmax_rapid'])
repeatead_measures = ['nice_penumbra_noinfarct_z_ef', 'nice_penumbra_infarct_z_ef']
df_ef = pd.melt(statistics_df_clean.reset_index(), 
                 id_vars=['sub_id','AGE', 'SEX', 'NIHSSSCORE_V00', 'stroke_volume_v00', 'treatment', 'scanner'], 
                 value_vars=repeatead_measures, 
                 var_name="loc_measurement", 
                 value_name="EF", ignore_index=False).reset_index(drop=True)
df_ef['loc_measurement'] = df_ef['loc_measurement'].apply(lambda x: x.split('_')[2])

# Tmax dataframe
tmax = ['nice_penumbra_noinfarct_mean_tmax_rapid', 'nice_penumbra_infarct_mean_tmax_rapid']
df_tmax = pd.melt(statistics_df_clean.reset_index(), 
                 id_vars='sub_id',
                    value_vars=tmax,
                    var_name='loc_measurement',
                    value_name='Tmax', ignore_index=False).reset_index(drop=True)

# Rename values of loc_measurement by splitting the string
df_tmax['loc_measurement'] = df_tmax['loc_measurement'].apply(lambda x: x.split('_')[2])

# Merge EF and Tmax dataframes
df_lme = pd.merge(df_ef, df_tmax, on=['sub_id', 'loc_measurement'])

# Z-score age, stroke volume, NIHSS and EF
columns_to_normalize = ['AGE', 'stroke_volume_v00', 'NIHSSSCORE_V00', 'EF']
df_lme[columns_to_normalize] = df_lme[columns_to_normalize].apply(lambda x: (x - x.mean()) / x.std())

In [None]:
from pymer4.models import Lmer

# Define model
model = Lmer("EF ~ loc_measurement + Tmax + AGE + SEX + NIHSSSCORE_V00 + stroke_volume_v00 + treatment + (1|sub_id) + (1|scanner)", data=df_lme)

# Fit LMM 
lme = model.fit(factors={"loc_measurement": ['noinfarct', 'infarct']})
lme.to_csv(output_dir / "penumbra_adjusted_lme.csv")
print(lme)

# Get ANOVA table
anova = model.anova()
anova.to_csv(output_dir / "penumbra_adjusted_anova.csv")
anova

In [None]:
# Compute post-hoc tests
marginal_estimates, comparisons = model.post_hoc(marginal_vars="loc_measurement", grouping_vars="loc_measurement")

# "Cell" means of the ANOVA
comparisons.to_csv(output_dir/"penumbra_adjusted_posthoc.csv")
print(comparisons)


## Visualization

In [None]:
location_styled = [
    'Salvaged Penumbra', 
    'Infarcted Penumbra'
]

ef_styled = ['EF (z-scored)']

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

statistics_df_clean = statistics_df.dropna(subset=['nice_penumbra_z_ef'])
repeatead_measures = ['nice_penumbra_noinfarct_z_ef', 'nice_penumbra_infarct_z_ef']
df_lme = pd.melt(statistics_df_clean.reset_index(), 
                 id_vars=['sub_id','AGE', 'SEX', 'NIHSSSCORE_V00', 'stroke_volume_v00', 'scanner'], 
                 value_vars=repeatead_measures, 
                 var_name="loc_measurement", 
                 value_name="EF", ignore_index=False).reset_index(drop=True)

df_lme['loc_measurement'] = df_lme['loc_measurement'].apply(lambda x: x.split('_')[2])

custom_palette = sns.color_palette("Paired")

plt.figure(figsize=(8, 6))
sns.boxplot(y='EF', x='loc_measurement', data=df_lme, width=0.5, palette=custom_palette, boxprops=dict(alpha=0.7), showfliers=False)
sns.stripplot(y='EF', x='loc_measurement', data=df_lme, color='black', size=3, jitter=True, alpha=0.6)

# Annotations for significance
y_max = df_lme['EF'].max()
y_step = y_max * 0.12  # Calculate step size for annotations based on the maximum EF value

# Ensure the calculation of 'y' is correct by explicitly converting 'i' and 'y_step' to compatible types
for i, row in comparisons.iterrows():
    groups = row['Contrast'].split(' - ')
    p_value = row['P-val']
    
    # Assuming group names in 'loc_measurement' match those in 'Comparison'
    group_labels = df_lme['loc_measurement'].unique().tolist()
    x1 = group_labels.index(groups[0])
    x2 = group_labels.index(groups[1])
    
    # Correctly calculate 'y' by ensuring 'i' and 'y_step' are compatible types
    y = y_max + ((float(i)) * y_step)  # Added (i+1) to ensure spacing starts above the max value
    
    # Adjust 'p_text' based on your significance criteria
    p_text = '***' if p_value < 0.001 else '**' if p_value < 0.01 else '*' if p_value < 0.05 else 'ns'
    
    plt.plot([x1, x1, x2, x2], [y - y_step/4, y, y, y - y_step/4], lw=1.5, c='black')
    plt.text((x1 + x2) * 0.5, y, p_text, ha='center', va='bottom')


plt.xticks(ticks=np.arange(len(location_styled)), labels=location_styled, size=10)
plt.xlabel('')
plt.ylabel('EF (z-scored)')

plt.savefig(output_dir/"boxplot_penumbra_adjusted_ef.png", dpi=300)
