# Imports

In [77]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation

import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import pickle

# Set Directory Paths

In [78]:
# Location of the data folder
%store -r dataDir

# Directory path to load formatted gfap_ferritin_dataDir Data
%store -r gfap_ferritin_dataDir

# Directory path to save Calcualted Analysis Data
%store -r gfap_ferritin_CalData

# Loading GFAP, Ferritin Dataset - %AO

In [79]:
gfap_ferr_T = pd.read_csv(os.path.join(dataDir, 'GFAP_Ferritin', 'FTLD AO output 162024INDDID.csv'))

In [80]:
gfap_ferr_T

Unnamed: 0,AutopsyID,BlockID,Hemisphere,Region,Ferritin,GFAP,AT8,TDP43,INDDID
0,1986-003,1986-003-12F,N,MFC,,0.054025,,,103121.0
1,1986-003,1986-003-12F,R,MFC,0.006197,,,,103121.0
2,1986-003,1986-003-13F,R,M1,0.024898,0.054097,,,103121.0
3,1986-003,1986-003-61F,R,HIP,,,0.000154,,103121.0
4,1995-005,1995-005-04E,R,OFC,0.026164,0.051511,,0.004535,111517.0
...,...,...,...,...,...,...,...,...,...
3289,2018-216,2018-216-27F,L,OFC,,,0.006361,,119113.0
3290,2018-216,2018-216-28F,L,V1,,,0.000256,,119113.0
3291,2018-216,2018-216-29F,L,ANG,,,0.007745,,119113.0
3292,2018-216,2018-216-30F,L,aCING,,,0.004663,,119113.0


# Load IronGlia_FTLD_INDDIDs Data

In [81]:
ironGlia_T = pd.read_excel(os.path.join(dataDir, 'GFAP_Ferritin', 'Copy of IronGlia_FTLD_INDDIDs Neuopath dx code.xlsx'))

# Merging DataFrames by INDDID

In [82]:
gfap_ferr_T_merged = pd.merge(gfap_ferr_T, ironGlia_T, on='INDDID', how='outer')[['INDDID', 'AutopsyID', 'BlockID', 'Hemisphere', 'Region', 'Ferritin', 'GFAP', 'AT8', 'TDP43', '0Cont1TDP2Tau', '1ExcludeALSorLATE']]

In [83]:
gfap_ferr_T_merged

Unnamed: 0,INDDID,AutopsyID,BlockID,Hemisphere,Region,Ferritin,GFAP,AT8,TDP43,0Cont1TDP2Tau,1ExcludeALSorLATE
0,103121.0,1986-003,1986-003-12F,N,MFC,,0.054025,,,2,0
1,103121.0,1986-003,1986-003-12F,R,MFC,0.006197,,,,2,0
2,103121.0,1986-003,1986-003-13F,R,M1,0.024898,0.054097,,,2,0
3,103121.0,1986-003,1986-003-61F,R,HIP,,,0.000154,,2,0
4,111517.0,1995-005,1995-005-04E,R,OFC,0.026164,0.051511,,0.004535,1,0
...,...,...,...,...,...,...,...,...,...,...,...
3289,119113.0,2018-216,2018-216-27F,L,OFC,,,0.006361,,2,0
3290,119113.0,2018-216,2018-216-28F,L,V1,,,0.000256,,2,0
3291,119113.0,2018-216,2018-216-29F,L,ANG,,,0.007745,,2,0
3292,119113.0,2018-216,2018-216-30F,L,aCING,,,0.004663,,2,0


## Exclude where 1ExcludeALSorLATE == 1

In [84]:
gfap_ferr_T_merged = gfap_ferr_T_merged[gfap_ferr_T_merged['1ExcludeALSorLATE'] != 1]

## Format the GFAP, Ferritin Data - %AO to desired format

### Get Tau(AT8) vs TDP43 Groups

In [90]:
gfap_ferr_type = gfap_ferr_T_merged.groupby('0Cont1TDP2Tau')

# HC
hc_T = gfap_ferr_type.get_group(0)

# Tau
tau_T = gfap_ferr_type.get_group(2)

# TDP
tdp_T = gfap_ferr_type.get_group(1)


# AutopsyID
hc_INDDIDs = hc_T['INDDID'].unique().tolist()
tau_INDDIDs = tau_T['INDDID'].unique().tolist()
tdp_INDDIDs = tdp_T['INDDID'].unique().tolist()
print(f'Number of Unique HC INDDIDs is: {len(hc_INDDIDs)}')
print(f'Number of Unique TAU INDDIDs is: {len(tau_INDDIDs)}')
print(f'Number of Unique TDP INDDIDs is: {len(tdp_INDDIDs)}')

# Get rid of rows where AT8 is NaN for TAU
tau_T = tau_T[tau_T['AT8'].notna()]

# Get rid of rows where TDP43 is NaN for TDP43
tdp_T = tdp_T[tdp_T['TDP43'].notna()]

Number of Unique HC INDDIDs is: 34
Number of Unique TAU INDDIDs is: 138
Number of Unique TDP INDDIDs is: 99


In [91]:
tau_T

Unnamed: 0,INDDID,AutopsyID,BlockID,Hemisphere,Region,Ferritin,GFAP,AT8,TDP43,0Cont1TDP2Tau,1ExcludeALSorLATE
3,103121.0,1986-003,1986-003-61F,R,HIP,,,0.000154,,2,0
9,110745.0,1995-057,1995-057-03E,L,MFC,0.033260,0.040396,0.133789,,2,0
10,110745.0,1995-057,1995-057-04E,L,OFC,0.025673,0.017537,0.126244,,2,0
13,110745.0,1995-057,1995-057-05E,L,pSTC,,,0.112106,,2,0
14,110745.0,1995-057,1995-057-18E,L,HIP,,,0.057006,,2,0
...,...,...,...,...,...,...,...,...,...,...,...
3289,119113.0,2018-216,2018-216-27F,L,OFC,,,0.006361,,2,0
3290,119113.0,2018-216,2018-216-28F,L,V1,,,0.000256,,2,0
3291,119113.0,2018-216,2018-216-29F,L,ANG,,,0.007745,,2,0
3292,119113.0,2018-216,2018-216-30F,L,aCING,,,0.004663,,2,0


In [92]:
tdp_T

Unnamed: 0,INDDID,AutopsyID,BlockID,Hemisphere,Region,Ferritin,GFAP,AT8,TDP43,0Cont1TDP2Tau,1ExcludeALSorLATE
4,111517.0,1995-005,1995-005-04E,R,OFC,0.026164,0.051511,,0.004535,1,0
5,111517.0,1995-005,1995-005-08E,R,ANG,0.022526,,,0.002226,1,0
6,111517.0,1995-005,1995-005-02E,R,M1,,,,0.000940,1,0
7,111517.0,1995-005,1995-005-06E,R,pSTC,,,,0.000495,1,0
8,111517.0,1995-005,1995-005-17E,R,HIP,,,,0.003235,1,0
...,...,...,...,...,...,...,...,...,...,...,...
3234,120720.0,2016-126,2016-126-28F,L,MFC,,,,0.000013,1,0
3235,120720.0,2016-126,2016-126-30F,L,OFC,,,,0.000222,1,0
3236,120720.0,2016-126,2016-126-31F,L,V1,,,,0.000128,1,0
3237,120720.0,2016-126,2016-126-33F,L,aCING,,,,0.000065,1,0


#### Common INDDIDs between TAU vs TDP43

In [93]:
# Convert lists to sets and find the intersection
common_elements = set(tau_INDDIDs).intersection(tdp_INDDIDs)

# Check if there are any common elements
if common_elements:
    print("Common elements:", common_elements)
else:
    print("No common elements")

No common elements


## TAU - MFC, ANG

### MFC: TAU, Ferritin %AO

In [94]:
# Get MFC Region - TAU
tau_region_type = tau_T.groupby('Region')
tau_MFC = tau_region_type.get_group('MFC')

# Remove rows where Ferritin Value is NaN
tau_MFC = tau_MFC.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tau_MFC != 0) & tau_MFC.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tau_MFC = tau_MFC.drop(columns=['Region', 'TDP43'])
tau_MFC = tau_MFC.rename(columns=lambda x: x + '_MFC')

INDDID               51
AutopsyID            51
BlockID              51
Hemisphere           51
Region               51
Ferritin             51
GFAP                 31
AT8                  51
TDP43                 0
0Cont1TDP2Tau        51
1ExcludeALSorLATE     0
dtype: int64


### ANG: TAU, Ferritin %AO

In [95]:
# Get MFC Region - TAU
tau_region_type = tau_T.groupby('Region')
tau_ANG = tau_region_type.get_group('ANG')

# Remove rows where Ferritin Value is NaN
tau_ANG = tau_ANG.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tau_ANG != 0) & tau_ANG.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tau_ANG = tau_ANG.drop(columns=['Region', 'TDP43'])
tau_ANG = tau_ANG.rename(columns=lambda x: x + '_ANG')

INDDID               27
AutopsyID            27
BlockID              27
Hemisphere           27
Region               27
Ferritin             27
GFAP                  0
AT8                  27
TDP43                 0
0Cont1TDP2Tau        27
1ExcludeALSorLATE     0
dtype: int64


In [96]:
# Merge MFC, ANG based on AutopsyID & Hemisphere
TAU_MFC_ANG = pd.merge(tau_MFC, tau_ANG, left_on=['AutopsyID_MFC', 'Hemisphere_MFC'],  
                                         right_on=['AutopsyID_ANG', 'Hemisphere_ANG'], how='inner')

TAU_MFC_ANG = TAU_MFC_ANG.drop(columns=['AutopsyID_ANG', 'Hemisphere_ANG'])
TAU_MFC_ANG = TAU_MFC_ANG.rename(columns={'AutopsyID_MFC': 'AutopsyID',
                                          'Hemisphere_MFC': 'Hemisphere'})

In [97]:
TAU_MFC_ANG

Unnamed: 0,INDDID_MFC,AutopsyID,BlockID_MFC,Hemisphere,Ferritin_MFC,GFAP_MFC,AT8_MFC,0Cont1TDP2Tau_MFC,1ExcludeALSorLATE_MFC,INDDID_ANG,BlockID_ANG,Ferritin_ANG,GFAP_ANG,AT8_ANG,0Cont1TDP2Tau_ANG,1ExcludeALSorLATE_ANG
0,102149.0,2005-073,2005-073-29F,R,0.01593,0.034787,0.098714,2,0,102149.0,2005-073-34F,0.00888,,0.070316,2,0
1,113113.0,2006-027,2006-027-04F,L,0.010866,0.022061,0.010569,2,0,113113.0,2006-027-09F,0.017863,,0.006209,2,0
2,107516.0,2008-251,2008-251-29F,L,0.02853,0.045559,0.254047,2,0,107516.0,2008-251-32F,0.027772,,0.310358,2,0
3,115327.0,2009-102,2009-102-25F,R,0.006274,0.02802,0.000332,2,0,115327.0,2009-102-28F,0.018715,,0.000395,2,0
4,108026.0,2013-223,2013-223-06F,L,0.020343,0.063952,0.12553,2,0,108026.0,2013-223-03F,0.024613,,0.046466,2,0
5,111231.0,2014-011,2014-011-28F,L,0.026754,,0.004994,2,0,111231.0,2014-011-32F,0.027113,,0.002966,2,0
6,116607.0,2014-052,2014-052-26F,R,0.017498,0.036848,0.007585,2,0,116607.0,2014-052-30F,0.017715,,0.022633,2,0
7,106814.0,2014-145,2014-145-04F,L,0.01044,0.049137,0.174298,2,0,106814.0,2014-145-07F,0.012162,,0.001935,2,0
8,116591.0,2014-212,2014-212-25F,L,0.016773,0.03128,0.00926,2,0,116591.0,2014-212-28F,0.020828,,0.00733,2,0
9,116504.0,2014-251,2014-251-24F,L,0.026339,0.025393,0.222864,2,0,116504.0,2014-251-27F,0.020757,,0.036539,2,0


### OFC: TAU, Ferritin %AO

In [98]:
# Get MFC Region - TAU
tau_region_type = tau_T.groupby('Region')
tau_OFC = tau_region_type.get_group('OFC')

# Remove rows where Ferritin Value is NaN
tau_OFC = tau_OFC.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tau_OFC != 0) & tau_OFC.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tau_OFC = tau_OFC.drop(columns=['Region', 'TDP43'])
tau_OFC = tau_OFC.rename(columns=lambda x: x + '_OFC')

INDDID               42
AutopsyID            42
BlockID              42
Hemisphere           42
Region               42
Ferritin             42
GFAP                 23
AT8                  42
TDP43                 0
0Cont1TDP2Tau        42
1ExcludeALSorLATE     0
dtype: int64


### ANG: TAU, Ferritin %AO

In [99]:
# Get MFC Region - TAU
tau_region_type = tau_T.groupby('Region')
tau_ANG = tau_region_type.get_group('ANG')

# Remove rows where Ferritin Value is NaN
tau_ANG = tau_ANG.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tau_ANG != 0) & tau_ANG.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tau_ANG = tau_ANG.drop(columns=['Region', 'TDP43'])
tau_ANG = tau_ANG.rename(columns=lambda x: x + '_ANG')

INDDID               27
AutopsyID            27
BlockID              27
Hemisphere           27
Region               27
Ferritin             27
GFAP                  0
AT8                  27
TDP43                 0
0Cont1TDP2Tau        27
1ExcludeALSorLATE     0
dtype: int64


In [100]:
# Merge MFC, ANG based on AutopsyID & Hemisphere
TAU_OFC_ANG = pd.merge(tau_OFC, tau_ANG, left_on=['AutopsyID_OFC', 'Hemisphere_OFC'],  
                                         right_on=['AutopsyID_ANG', 'Hemisphere_ANG'], how='inner')

TAU_OFC_ANG = TAU_OFC_ANG.drop(columns=['AutopsyID_ANG', 'Hemisphere_ANG'])
TAU_OFC_ANG = TAU_OFC_ANG.rename(columns={'AutopsyID_OFC': 'AutopsyID',
                                          'Hemisphere_OFC': 'Hemisphere'})

In [101]:
TAU_OFC_ANG

Unnamed: 0,INDDID_OFC,AutopsyID,BlockID_OFC,Hemisphere,Ferritin_OFC,GFAP_OFC,AT8_OFC,0Cont1TDP2Tau_OFC,1ExcludeALSorLATE_OFC,INDDID_ANG,BlockID_ANG,Ferritin_ANG,GFAP_ANG,AT8_ANG,0Cont1TDP2Tau_ANG,1ExcludeALSorLATE_ANG
0,114348.0,2007-003,2007-003-25F,R,0.011096,0.034525,0.065233,2,0,114348.0,2007-003-27F,0.014662,,0.238042,2,0
1,107516.0,2008-251,2008-251-55F,L,0.025272,0.055223,0.136122,2,0,107516.0,2008-251-32F,0.027772,,0.310358,2,0
2,107187.0,2010-015,2010-015-32F,R,0.025353,0.075226,0.027926,2,0,107187.0,2010-015-34F,0.021949,,0.011312,2,0
3,111231.0,2014-011,2014-011-30F,L,0.018053,,0.001812,2,0,111231.0,2014-011-32F,0.027113,,0.002966,2,0
4,116591.0,2014-212,2014-212-26F,L,0.015978,0.031675,0.003125,2,0,116591.0,2014-212-28F,0.020828,,0.00733,2,0
5,116504.0,2014-251,2014-251-25F,L,0.019426,,0.037876,2,0,116504.0,2014-251-27F,0.020757,,0.036539,2,0
6,117566.0,2016-220,2016-220-27F,R,0.015978,,0.001,2,0,117566.0,2016-220-29F,0.018277,,0.000706,2,0
7,118410.0,2017-109,2017-109-26F,L,0.014577,0.038298,0.535804,2,0,118410.0,2017-109-28F,0.016687,,0.030775,2,0
8,107677.0,2017-148,2017-148-27F,R,0.022327,,0.025033,2,0,107677.0,2017-148-29F,0.019824,,0.00677,2,0
9,114762.02,2017-158,2017-158-07F,R,0.008581,0.02774,0.067109,2,0,114762.02,2017-158-04F,0.030067,,0.159239,2,0


## TDP - MFC, ANG

### MFC: TDP, Ferritin %AO

In [102]:
# Get MFC Region - TAU
tdp_region_type = tdp_T.groupby('Region')
tdp_MFC = tdp_region_type.get_group('MFC')

# Remove rows where Ferritin Value is NaN
tdp_MFC = tdp_MFC.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tdp_MFC != 0) & tdp_MFC.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tdp_MFC = tdp_MFC.drop(columns=['Region', 'AT8'])
tdp_MFC = tdp_MFC.rename(columns=lambda x: x + '_MFC')

INDDID               55
AutopsyID            55
BlockID              55
Hemisphere           55
Region               55
Ferritin             55
GFAP                 53
AT8                   0
TDP43                55
0Cont1TDP2Tau        55
1ExcludeALSorLATE     0
dtype: int64


### ANG: TDP, Ferritin %AO

In [103]:
# Get MFC Region - TAU
tdp_region_type = tdp_T.groupby('Region')
tdp_ANG = tdp_region_type.get_group('ANG')

# Remove rows where Ferritin Value is NaN
tdp_ANG = tdp_ANG.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tdp_ANG != 0) & tdp_ANG.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tdp_ANG = tdp_ANG.drop(columns=['Region', 'AT8'])
tdp_ANG = tdp_ANG.rename(columns=lambda x: x + '_ANG')

INDDID               48
AutopsyID            48
BlockID              48
Hemisphere           48
Region               48
Ferritin             48
GFAP                 10
AT8                   0
TDP43                48
0Cont1TDP2Tau        48
1ExcludeALSorLATE     0
dtype: int64


In [104]:
# Merge MFC, ANG based on AutopsyID & Hemisphere
TDP_MFC_ANG = pd.merge(tdp_MFC, tdp_ANG, left_on=['AutopsyID_MFC', 'Hemisphere_MFC'],  
                                         right_on=['AutopsyID_ANG', 'Hemisphere_ANG'], how='inner')

TDP_MFC_ANG = TDP_MFC_ANG.drop(columns=['AutopsyID_ANG', 'Hemisphere_ANG'])
TDP_MFC_ANG = TDP_MFC_ANG.rename(columns={'AutopsyID_MFC': 'AutopsyID',
                                          'Hemisphere_MFC': 'Hemisphere'})

In [105]:
TDP_MFC_ANG

Unnamed: 0,INDDID_MFC,AutopsyID,BlockID_MFC,Hemisphere,Ferritin_MFC,GFAP_MFC,TDP43_MFC,0Cont1TDP2Tau_MFC,1ExcludeALSorLATE_MFC,INDDID_ANG,BlockID_ANG,Ferritin_ANG,GFAP_ANG,TDP43_ANG,0Cont1TDP2Tau_ANG,1ExcludeALSorLATE_ANG
0,112298.0,1999-224,1999-224-01E,L,0.042623,0.042827,0.002274,1,0,112298.0,1999-224-35F,0.009827,,0.001056,1,0
1,110658.0,2002-238,2002-238-15F,L,0.024325,0.034106,0.000813,1,0,110658.0,2002-238-18F,0.017602,,0.004022,1,0
2,114076.0,2004-158,2004-158-26F,L,0.019065,0.030841,0.001851,1,0,114076.0,2004-158-31F,0.015413,,0.001289,1,0
3,110361.0,2004-262,2004-262-26F,L,0.019302,0.022704,0.000424,1,0,110361.0,2004-262-31F,0.022496,,0.000579,1,0
4,104659.0,2005-208,2005-208-24F,L,0.009474,0.046669,0.000646,1,0,104659.0,2005-208-27F,0.013243,,0.000379,1,0
5,111092.0,2008-001,2008-001-23F,R,0.01552,0.026493,7.6e-05,1,0,111092.0,2008-001-26F,0.020279,,2e-06,1,0
6,101045.0,2008-156,2008-156-32F,R,0.012718,0.018712,3.1e-05,1,0,101045.0,2008-156-07E,0.007257,,0.00013,1,0
7,105686.0,2008-205,2008-205-30F,L,0.023678,0.053424,8e-06,1,0,105686.0,2008-205-34F,0.019702,,1e-05,1,0
8,112273.0,2009-168,2009-168-27F,R,0.016023,0.028495,0.000403,1,0,112273.0,2009-168-30F,0.019133,,5.7e-05,1,0
9,116561.0,2012-008,2012-008-05E,R,0.041211,0.040269,0.001953,1,0,116561.0,2012-008-32F,0.021777,,0.000114,1,0


### OFC: TDP, Ferritin %AO

In [106]:
# Get MFC Region - TAU
tdp_region_type = tdp_T.groupby('Region')
tdp_OFC = tdp_region_type.get_group('OFC')

# Remove rows where Ferritin Value is NaN
tdp_OFC = tdp_OFC.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tdp_OFC != 0) & tdp_OFC.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tdp_OFC = tdp_OFC.drop(columns=['Region', 'AT8'])
tdp_OFC = tdp_OFC.rename(columns=lambda x: x + '_OFC')

INDDID               75
AutopsyID            75
BlockID              75
Hemisphere           75
Region               75
Ferritin             75
GFAP                 69
AT8                   0
TDP43                75
0Cont1TDP2Tau        75
1ExcludeALSorLATE     0
dtype: int64


### ANG: TDP, Ferritin %AO

In [107]:
# Get MFC Region - TAU
tdp_region_type = tdp_T.groupby('Region')
tdp_ANG = tdp_region_type.get_group('ANG')

# Remove rows where Ferritin Value is NaN
tdp_ANG = tdp_ANG.dropna(subset=['Ferritin'])

# Count non NaN and non zero values for each column
non_zero_non_nan_counts = (tdp_ANG != 0) & tdp_ANG.notna()
counts = non_zero_non_nan_counts.sum()

print(counts)

# Rename Columns / Drop TDP43
tdp_ANG = tdp_ANG.drop(columns=['Region', 'AT8'])
tdp_ANG = tdp_ANG.rename(columns=lambda x: x + '_ANG')

INDDID               48
AutopsyID            48
BlockID              48
Hemisphere           48
Region               48
Ferritin             48
GFAP                 10
AT8                   0
TDP43                48
0Cont1TDP2Tau        48
1ExcludeALSorLATE     0
dtype: int64


In [108]:
# Merge MFC, ANG based on AutopsyID & Hemisphere
TDP_OFC_ANG = pd.merge(tdp_OFC, tdp_ANG, left_on=['AutopsyID_OFC', 'Hemisphere_OFC'],  
                                         right_on=['AutopsyID_ANG', 'Hemisphere_ANG'], how='inner')

TDP_OFC_ANG = TDP_OFC_ANG.drop(columns=['AutopsyID_ANG', 'Hemisphere_ANG'])
TDP_OFC_ANG = TDP_OFC_ANG.rename(columns={'AutopsyID_OFC': 'AutopsyID',
                                          'Hemisphere_OFC': 'Hemisphere'})

In [109]:
TDP_OFC_ANG

Unnamed: 0,INDDID_OFC,AutopsyID,BlockID_OFC,Hemisphere,Ferritin_OFC,GFAP_OFC,TDP43_OFC,0Cont1TDP2Tau_OFC,1ExcludeALSorLATE_OFC,INDDID_ANG,BlockID_ANG,Ferritin_ANG,GFAP_ANG,TDP43_ANG,0Cont1TDP2Tau_ANG,1ExcludeALSorLATE_ANG
0,111517.0,1995-005,1995-005-04E,R,0.026164,0.051511,0.004535,1,0,111517.0,1995-005-08E,0.022526,,0.002226,1,0
1,111863.0,1995-217,1995-217-05F,R,0.017214,0.03769,0.000888,1,0,111863.0,1995-217-08F,0.031052,,0.001599,1,0
2,103640.0,1999-170,1999-170-03E,L,0.027969,0.036721,0.00245,1,0,103640.0,1999-170-06E,0.011858,,0.001215,1,0
3,112298.0,1999-224,1999-224-31F,L,0.013363,0.042514,0.000669,1,0,112298.0,1999-224-35F,0.009827,,0.001056,1,0
4,113227.0,2000-149,2000-149-40F,R,0.01686,0.034996,0.005378,1,0,113227.0,2000-149-42F,0.019514,,0.008545,1,0
5,110338.0,2004-008,2004-008-05F,L,0.024742,0.047877,0.001934,1,0,110338.0,2004-008-13F,0.021446,,0.000265,1,0
6,111092.0,2008-001,2008-001-24F,R,0.00776,0.030444,6.3e-05,1,0,111092.0,2008-001-26F,0.020279,,2e-06,1,0
7,103601.0,2008-147,2008-147-06E,L,0.020561,0.040145,0.003692,1,0,103601.0,2008-147-36F,0.024599,,0.002295,1,0
8,101045.0,2008-156,2008-156-33F,R,0.013347,0.031664,0.000484,1,0,101045.0,2008-156-07E,0.007257,,0.00013,1,0
9,116561.0,2012-008,2012-008-30F,R,0.01065,0.0427,0.000279,1,0,116561.0,2012-008-32F,0.021777,,0.000114,1,0


# Mediation Analysis

## AT8_MFC $\rightarrow$ Ferritin_MFC $\rightarrow$ AT8_ANG

In [110]:
# Model for the mediator
mediator_model = sm.OLS(TAU_MFC_ANG['Ferritin_MFC'], sm.add_constant(TAU_MFC_ANG['AT8_MFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TAU_MFC_ANG['AT8_ANG'], sm.add_constant(TAU_MFC_ANG[['AT8_MFC', 'Ferritin_MFC']]))

# Create the mediation analysis model / AT8_MFC: exposure = Independent Variable, Ferritin_MFC: Mediator
med_analysis = Mediation(outcome_model, mediator_model, 'AT8_MFC', 'Ferritin_MFC')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),0.005915,-0.111647,0.125191,0.878
ACME (treated),0.005915,-0.111647,0.125191,0.878
ADE (control),0.74543,0.465241,1.014288,0.0
ADE (treated),0.74543,0.465241,1.014288,0.0
Total effect,0.751345,0.479128,1.020776,0.0
Prop. mediated (control),0.004254,-0.153925,0.160041,0.878
Prop. mediated (treated),0.004254,-0.153925,0.160041,0.878
ACME (average),0.005915,-0.111647,0.125191,0.878
ADE (average),0.74543,0.465241,1.014288,0.0
Prop. mediated (average),0.004254,-0.153925,0.160041,0.878


In [111]:
from statsmodels.formula.api import ols
# Step 1: Regress the DV on the IV
model_1 = ols('AT8_ANG ~ AT8_MFC', data=TAU_MFC_ANG).fit()
a = model_1.params['AT8_MFC']  # Use the name of the IV here, not the DV

# Step 2: Regress the MV on the IV
model_2 = ols('Ferritin_MFC ~ AT8_MFC', data=TAU_MFC_ANG).fit()
b = model_2.params['AT8_MFC']  # Again, use the IV's name

# Step 3: Regress the DV on both the IV and MV
model_3 = ols('AT8_ANG ~ AT8_MFC + Ferritin_MFC', data=TAU_MFC_ANG).fit()
c_prime = model_3.params['AT8_MFC']  # And again, use the IV's name

# The parameter for the mediator in the third model would be:
b_prime = model_3.params['Ferritin_MFC']

In [112]:
# print(a) # b1: Y = b0 + b1*X
# print(b) # b2: M = b0 + b2*M
# print(c_prime) #b4: Y = b0 + b4*X + b3*M
# print(b_prime) #b3

# ACME
print(f'ACME: {a - c_prime} or {b * b_prime}')
# ADE
print(f'ADE: {c_prime}')
# Total Effect
print(f'Total Effect: {a}')


ACME: 0.008371725237278582 or 0.008371725237278076
ADE: 0.7510891455798708
Total Effect: 0.7594608708171494


## AT8_OFC $\rightarrow$ Ferritin_OFC $\rightarrow$ AT8_ANG

In [113]:
# Model for the mediator
mediator_model = sm.OLS(TAU_OFC_ANG['Ferritin_OFC'], sm.add_constant(TAU_OFC_ANG['AT8_OFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TAU_OFC_ANG['AT8_ANG'], sm.add_constant(TAU_OFC_ANG[['AT8_OFC', 'Ferritin_OFC']]))

# Create the mediation analysis model
med_analysis = Mediation(outcome_model, mediator_model, 'AT8_OFC', 'Ferritin_OFC')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),0.007551,-0.217363,0.249423,0.982
ACME (treated),0.007551,-0.217363,0.249423,0.982
ADE (control),0.058651,-0.403234,0.553807,0.836
ADE (treated),0.058651,-0.403234,0.553807,0.836
Total effect,0.066202,-0.435184,0.577081,0.824
Prop. mediated (control),0.032443,-3.344903,3.018878,0.846
Prop. mediated (treated),0.032443,-3.344903,3.018878,0.846
ACME (average),0.007551,-0.217363,0.249423,0.982
ADE (average),0.058651,-0.403234,0.553807,0.836
Prop. mediated (average),0.032443,-3.344903,3.018878,0.846


## TDP43_MFC $\rightarrow$ Ferritin_MFC $\rightarrow$ TDP43_ANG

In [114]:
# Define the mediation model
# Model for the mediator
mediator_model = sm.OLS(TDP_MFC_ANG['Ferritin_MFC'], sm.add_constant(TDP_MFC_ANG['TDP43_MFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TDP_MFC_ANG['TDP43_ANG'], sm.add_constant(TDP_MFC_ANG[['TDP43_MFC', 'Ferritin_MFC']]))

# Create the mediation analysis model
med_analysis = Mediation(outcome_model, mediator_model, 'TDP43_MFC', 'Ferritin_MFC')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),-0.029265,-0.192241,0.085547,0.638
ACME (treated),-0.029265,-0.192241,0.085547,0.638
ADE (control),0.842712,0.417577,1.266911,0.0
ADE (treated),0.842712,0.417577,1.266911,0.0
Total effect,0.813447,0.3817,1.246049,0.0
Prop. mediated (control),-0.018214,-0.336531,0.116161,0.638
Prop. mediated (treated),-0.018214,-0.336531,0.116161,0.638
ACME (average),-0.029265,-0.192241,0.085547,0.638
ADE (average),0.842712,0.417577,1.266911,0.0
Prop. mediated (average),-0.018214,-0.336531,0.116161,0.638


## TDP43_OFC $\rightarrow$ Ferritin_OFC $\rightarrow$ TDP43_ANG

In [115]:
# Define the mediation model
# Model for the mediator
mediator_model = sm.OLS(TDP_OFC_ANG['Ferritin_OFC'], sm.add_constant(TDP_OFC_ANG['TDP43_OFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TDP_OFC_ANG['TDP43_ANG'], sm.add_constant(TDP_OFC_ANG[['TDP43_OFC', 'Ferritin_OFC']]))

# Create the mediation analysis model
med_analysis = Mediation(outcome_model, mediator_model, 'TDP43_OFC', 'Ferritin_OFC')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),0.004228,-0.01864,0.033691,0.74
ACME (treated),0.004228,-0.01864,0.033691,0.74
ADE (control),0.248019,0.168273,0.325019,0.0
ADE (treated),0.248019,0.168273,0.325019,0.0
Total effect,0.252247,0.173211,0.32805,0.0
Prop. mediated (control),0.008087,-0.082343,0.138497,0.74
Prop. mediated (treated),0.008087,-0.082343,0.138497,0.74
ACME (average),0.004228,-0.01864,0.033691,0.74
ADE (average),0.248019,0.168273,0.325019,0.0
Prop. mediated (average),0.008087,-0.082343,0.138497,0.74


In [116]:
# Try the average Ferritin avg between MFC & ANG
# OFC & ANG

## AT8_MFC $\rightarrow$ Ferritin_MFC+ANG $\rightarrow$ AT8_ANG

In [121]:
TAU_MFC_ANG['Ferritin_MFC_ANG'] = TAU_MFC_ANG[['Ferritin_MFC', 'Ferritin_ANG']].mean(axis=1)

In [122]:
# Model for the mediator
mediator_model = sm.OLS(TAU_MFC_ANG['Ferritin_MFC_ANG'], sm.add_constant(TAU_MFC_ANG['AT8_MFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TAU_MFC_ANG['AT8_ANG'], sm.add_constant(TAU_MFC_ANG[['AT8_MFC', 'Ferritin_MFC_ANG']]))

# Create the mediation analysis model / AT8_MFC: exposure = Independent Variable, Ferritin_MFC: Mediator
med_analysis = Mediation(outcome_model, mediator_model, 'AT8_MFC', 'Ferritin_MFC_ANG')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),-0.018665,-0.143127,0.056152,0.704
ACME (treated),-0.018665,-0.143127,0.056152,0.704
ADE (control),0.783095,0.550623,1.023837,0.0
ADE (treated),0.783095,0.550623,1.023837,0.0
Total effect,0.764429,0.520552,1.014028,0.0
Prop. mediated (control),-0.008682,-0.206278,0.070121,0.704
Prop. mediated (treated),-0.008682,-0.206278,0.070121,0.704
ACME (average),-0.018665,-0.143127,0.056152,0.704
ADE (average),0.783095,0.550623,1.023837,0.0
Prop. mediated (average),-0.008682,-0.206278,0.070121,0.704


## AT8_OFC $\rightarrow$ Ferritin_OFC+ANG $\rightarrow$ AT8_ANG

In [118]:
TAU_OFC_ANG['Ferritin_OFC_ANG'] = TAU_OFC_ANG[['Ferritin_OFC', 'Ferritin_ANG']].mean(axis=1)

In [120]:
# Model for the mediator
mediator_model = sm.OLS(TAU_OFC_ANG['Ferritin_OFC_ANG'], sm.add_constant(TAU_OFC_ANG['AT8_OFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TAU_OFC_ANG['AT8_ANG'], sm.add_constant(TAU_OFC_ANG[['AT8_OFC', 'Ferritin_OFC_ANG']]))

# Create the mediation analysis model
med_analysis = Mediation(outcome_model, mediator_model, 'AT8_OFC', 'Ferritin_OFC_ANG')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),-0.019759,-0.27987,0.189383,0.884
ACME (treated),-0.019759,-0.27987,0.189383,0.884
ADE (control),0.100959,-0.41613,0.598265,0.7
ADE (treated),0.100959,-0.41613,0.598265,0.7
Total effect,0.0812,-0.431273,0.595433,0.75
Prop. mediated (control),0.020639,-3.074827,5.666532,0.902
Prop. mediated (treated),0.020639,-3.074827,5.666532,0.902
ACME (average),-0.019759,-0.27987,0.189383,0.884
ADE (average),0.100959,-0.41613,0.598265,0.7
Prop. mediated (average),0.020639,-3.074827,5.666532,0.902


## TDP43_MFC $\rightarrow$ Ferritin_MFC+ANG $\rightarrow$ TDP43_ANG

In [123]:
TDP_MFC_ANG['Ferritin_MFC_ANG'] = TDP_MFC_ANG[['Ferritin_MFC', 'Ferritin_ANG']].mean(axis=1)

In [124]:
# Define the mediation model
# Model for the mediator
mediator_model = sm.OLS(TDP_MFC_ANG['Ferritin_MFC_ANG'], sm.add_constant(TDP_MFC_ANG['TDP43_MFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TDP_MFC_ANG['TDP43_ANG'], sm.add_constant(TDP_MFC_ANG[['TDP43_MFC', 'Ferritin_MFC_ANG']]))

# Create the mediation analysis model
med_analysis = Mediation(outcome_model, mediator_model, 'TDP43_MFC', 'Ferritin_MFC_ANG')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),-0.005001,-0.144955,0.122212,0.976
ACME (treated),-0.005001,-0.144955,0.122212,0.976
ADE (control),0.831153,0.420773,1.242946,0.0
ADE (treated),0.831153,0.420773,1.242946,0.0
Total effect,0.826152,0.393609,1.252588,0.0
Prop. mediated (control),-0.000609,-0.222504,0.14432,0.976
Prop. mediated (treated),-0.000609,-0.222504,0.14432,0.976
ACME (average),-0.005001,-0.144955,0.122212,0.976
ADE (average),0.831153,0.420773,1.242946,0.0
Prop. mediated (average),-0.000609,-0.222504,0.14432,0.976


## TDP43_OFC $\rightarrow$ Ferritin_OFC+ANG $\rightarrow$ TDP43_ANG

In [125]:
TDP_OFC_ANG['Ferritin_OFC_ANG'] = TDP_OFC_ANG[['Ferritin_OFC', 'Ferritin_ANG']].mean(axis=1)

In [126]:
# Define the mediation model
# Model for the mediator
mediator_model = sm.OLS(TDP_OFC_ANG['Ferritin_OFC'], sm.add_constant(TDP_OFC_ANG['TDP43_OFC']))

# Model for the dependent variable
outcome_model = sm.OLS(TDP_OFC_ANG['TDP43_ANG'], sm.add_constant(TDP_OFC_ANG[['TDP43_OFC', 'Ferritin_OFC']]))

# Create the mediation analysis model
med_analysis = Mediation(outcome_model, mediator_model, 'TDP43_OFC', 'Ferritin_OFC')

# Fit the model with bootstrapping
boot_results = med_analysis.fit(n_rep=1000)  # Using 1000 bootstrap samples

boot_results.summary()

Unnamed: 0,Estimate,Lower CI bound,Upper CI bound,P-value
ACME (control),0.004034,-0.020844,0.034854,0.742
ACME (treated),0.004034,-0.020844,0.034854,0.742
ADE (control),0.247099,0.169461,0.332936,0.0
ADE (treated),0.247099,0.169461,0.332936,0.0
Total effect,0.251133,0.17028,0.330312,0.0
Prop. mediated (control),0.007972,-0.08908,0.126177,0.742
Prop. mediated (treated),0.007972,-0.08908,0.126177,0.742
ACME (average),0.004034,-0.020844,0.034854,0.742
ADE (average),0.247099,0.169461,0.332936,0.0
Prop. mediated (average),0.007972,-0.08908,0.126177,0.742
