In [1]:
import pandas as pd

df = pd.read_parquet('ADNI_cleaned.parquet', engine = 'fastparquet')

In [2]:
df.shape

(1878, 17)

In [2]:
df['PTEDUCAT']

0       16.0
1       18.0
2       10.0
3       16.0
4       10.0
        ... 
1873    18.0
1874    18.0
1875    16.0
1876    14.0
1877    12.0
Name: PTEDUCAT, Length: 1878, dtype: float64

In [None]:
df.isna().sum()

In [None]:
import sys
import sys
sys.path.append('../ukb')  # adjust the path as needed
from doubleml_utils import run_dml

lancet_covs = ['GDTOTAL', 'MH14BALCH', 'MH16BSMOK', 'CLINICAL_LDL_C', 'PTEDUCAT', 'HMHYPERT', 'bmi']

df = df.dropna()

def assess_lancets_dml(features): 
    feature_summaries = {}
    for feature in features: 
        print(f"Running DML for feature: {feature}")
        covariate = df.drop(columns=lancet_covs+['RID', 'DXAD'])
        outcome = df['DXAD']
        exposure = df[feature]

        dml_model = run_dml(covariate, outcome, exposure)
        feature_summaries[feature] = dml_model.summary
    
    return feature_summaries

results = assess_lancets_dml(lancet_covs)

In [4]:
rows = []

for test_id, df in results.items():
    # Pull the row for `d` as a dictionary and tag with the test_id
    row = df.loc['d'].to_dict()
    row['test_id'] = test_id
    rows.append(row)

# Convert to a DataFrame
summary_df = pd.DataFrame(rows)

# Move 'test_id' to the front
summary_df = summary_df[['test_id'] + [col for col in summary_df.columns if col != 'test_id']]

In [5]:
summary_df

Unnamed: 0,test_id,coef,std err,t,P>|t|,2.5 %,97.5 %
0,GDTOTAL,0.016181,0.026738,0.605172,0.5450644,-0.036225,0.068588
1,MH14BALCH,-0.003318,0.001154,-2.874142,0.004051271,-0.00558,-0.001055
2,MH16BSMOK,-0.004,0.000418,-9.579694,9.733331000000001e-22,-0.004818,-0.003182
3,CLINICAL_LDL_C,0.024177,0.011263,2.146538,0.03183007,0.002101,0.046253
4,PTEDUCAT,-0.016005,0.002802,-5.711672,1.118717e-08,-0.021498,-0.010513
5,HMHYPERT,0.002141,0.013925,0.153764,0.8777957,-0.025151,0.029433
6,bmi,-0.003507,0.001291,-2.716256,0.006602487,-0.006038,-0.000977


In [None]:
summary_df.to_csv('./double_ml/adni_lancet_meta.txt')