# Predict Composite Cognitive Score 
Predict the composite cognitive score of a set of ADNI patients using Random Forrest and SVM methods. 

We are using the four ADSP-PHC composite scores for *Memory, Executive Function, Language and Visuospatial Ability*. The methods for deriving these are described in 'ADSP Phenotype Harmonization Consortium – Derivation of Cognitive Composite Scores' by Mukherjee et al (https://ida.loni.usc.edu/download/files/study/083f5b49-98d1-494a-aaf1-3310a9a8e62c/file/adni/ADNI_Cognition_Methods_Psychometric_Analyses_Oct2022.pdf).

In [92]:
import pandas as pd
import sys
import os
import numpy as np

## Processing Data

Match up the composite cognitive scores and functional connectivity data, then split into test + training sets

In [93]:
ADSP_DATA_PATH = "ADSP_PHC_COGN_Dec2023_FILTERED.csv"
FC_DATA_PATH = "../FMRI_ADNI_DATA/fc/"

In [94]:
# Process the ADSP Data

adsp_df = pd.read_csv(ADSP_DATA_PATH)
adsp_df = adsp_df.drop(columns=adsp_df.columns[0])
adsp_df.head()

Unnamed: 0,RID,SUBJID,PHASE,VISCODE,VISCODE2,EXAMDATE,PHC_Visit,PHC_Sex,PHC_Education,PHC_Ethnicity,...,PHC_MEM_PreciseFilter,PHC_EXF,PHC_EXF_SE,PHC_EXF_PreciseFilter,PHC_LAN,PHC_LAN_SE,PHC_LAN_PreciseFilter,PHC_VSP,PHC_VSP_SE,PHC_VSP_PreciseFilter
0,21,ADNI_011_S_0021,ADNI1,bl,bl,2005-10-24,1,2.0,18.0,2.0,...,1,0.295,0.335,1.0,0.816,0.304,1.0,0.264,0.547,1.0
1,21,ADNI_011_S_0021,ADNI1,m06,m06,2006-04-24,2,2.0,18.0,2.0,...,1,0.374,0.346,1.0,1.372,0.384,1.0,-0.333,0.464,1.0
2,21,ADNI_011_S_0021,ADNI1,m12,m12,2006-11-01,3,2.0,18.0,2.0,...,1,0.451,0.388,1.0,1.813,0.368,1.0,0.264,0.547,1.0
3,21,ADNI_011_S_0021,ADNI1,m24,m24,2007-10-31,4,2.0,18.0,2.0,...,1,0.534,0.351,1.0,1.17,0.316,1.0,0.264,0.547,1.0
4,21,ADNI_011_S_0021,ADNI1,m36,m36,2008-10-22,5,2.0,18.0,2.0,...,1,0.669,0.424,1.0,1.274,0.342,1.0,0.963,0.658,0.0


In [95]:
adsp_df = adsp_df.drop(columns=[
    'SUBJID', 'PHASE', 'VISCODE', 'EXAMDATE', 'PHC_Visit', 'PHC_Sex', 'PHC_Education', 'PHC_Ethnicity', 'PHC_Race', 'PHC_Age_Cognition', 
    'PHC_MEM_SE', 'PHC_MEM_PreciseFilter', 'PHC_EXF_SE', 'PHC_EXF_PreciseFilter', 'PHC_LAN_SE', 'PHC_LAN_PreciseFilter', 'PHC_VSP_SE',
    'PHC_VSP_PreciseFilter'
])
adsp_df.head()

Unnamed: 0,RID,VISCODE2,PHC_Diagnosis,PHC_MEM,PHC_EXF,PHC_LAN,PHC_VSP
0,21,bl,1.0,1.481,0.295,0.816,0.264
1,21,m06,1.0,1.464,0.374,1.372,-0.333
2,21,m12,1.0,1.647,0.451,1.813,0.264
3,21,m24,1.0,1.309,0.534,1.17,0.264
4,21,m36,1.0,1.945,0.669,1.274,0.963


In [96]:
# Pad out the visit codes
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].str.upper()

adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('BL', 'M000')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('SC', 'M000')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M06', 'M006')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M12', 'M012')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M24', 'M024')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M36', 'M036')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M60', 'M060')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M72', 'M072')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M84', 'M084')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M96', 'M096')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M48', 'M048')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M18', 'M018')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M78', 'M078')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M90', 'M090')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M66', 'M066')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M54', 'M054')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M42', 'M042')
adsp_df['VISCODE2'] = adsp_df['VISCODE2'].replace('M30', 'M030')

# Pad the RID values
adsp_df['RID'] = adsp_df['RID'].apply(lambda x: str(x).zfill(4))

adsp_df.head()

Unnamed: 0,RID,VISCODE2,PHC_Diagnosis,PHC_MEM,PHC_EXF,PHC_LAN,PHC_VSP
0,21,M000,1.0,1.481,0.295,0.816,0.264
1,21,M006,1.0,1.464,0.374,1.372,-0.333
2,21,M012,1.0,1.647,0.451,1.813,0.264
3,21,M024,1.0,1.309,0.534,1.17,0.264
4,21,M036,1.0,1.945,0.669,1.274,0.963


Get the FC data and add

In [97]:
import re

def get_rid_viscode(filename):
    pattern = r'sub-ADNI\d+S(\d{4})_ses-(M\d{3})'
    match = re.search(pattern, filename)

    if match:
        rid = match.group(1)
        viscode = match.group(2)
        return rid, viscode        
    else:
        print("Pattern not found in the filename.")
        return None


In [104]:
adsp_df['FC_DATA'] = None

fc_dir = os.listdir(FC_DATA_PATH)

fc_files = [os.path.join(FC_DATA_PATH, file) for file in fc_dir if file.endswith('.csv')]
len(fc_files)

1199

In [99]:
# test_file = fc_files[1]
# arr = np.genfromtxt(test_file, delimiter=',')
# rid, viscode = get_rid_viscode(test_file)
# # rids = adsp_df['RID'].values

# adsp_df.loc[(adsp_df['VISCODE2'] == viscode) & (adsp_df['RID'] == rid), 'FC_DATA'] = test_file

In [100]:
for fc in fc_files:
    rid, viscode = get_rid_viscode(fc)
    adsp_df.loc[(adsp_df['RID'] == rid) & (adsp_df['VISCODE2'] == viscode), 'FC_DATA'] = fc

In [105]:
adsp_df_filter = adsp_df[adsp_df['FC_DATA'].notna()]
adsp_df_filter.shape

(0, 8)

## Random Forrest Method
Prediction not differentiable wrt to input

## SVM Method
Prediction differentiable wrt to input