# Get Information about Datasets Used

Helper notebook to get additional information on the datasets used in the project

In [2]:
import pandas as pd
from scipy.io import loadmat

## ADSP PHC Composite Data

In [15]:
DATA_PATH = 'data/Cog_Scores/ADSP_PHC_COGN_Dec2023_FILTERED_wfiles.csv'
patients = pd.read_csv(DATA_PATH)
patients['Used'] = False

In [13]:
from scipy.stats import pearsonr

groups = ['PHC_MEM', 'PHC_EXF', 'PHC_LAN', 'PHC_VSP']

for g in groups:
    print(g)
    sample = patients.dropna(subset=['PHC_Diagnosis', g])

    corr_coef, p_value = pearsonr(sample['PHC_Diagnosis'], sample[g])

    print('Pearsons R: ', corr_coef)
    print('P Value: ', p_value)

PHC_MEM
Pearsons R:  -0.7571035861468298
P Value:  6.540363843947471e-249
PHC_EXF
Pearsons R:  -0.6020068719891599
P Value:  1.2284638399438177e-132
PHC_LAN
Pearsons R:  -0.5712093377322083
P Value:  1.2349079030803372e-116
PHC_VSP
Pearsons R:  -0.35498611010860115
P Value:  5.502276563073525e-41


In [13]:
for file in patients['FC_DATA']:
    fc = loadmat(file)
    fc = fc['ROI_activity'][:100, :]
    if fc.shape[1] == 197:
        patients.loc[patients['FC_DATA'] == file, 'Used'] = True

sample = patients[patients['Used'] == True]

## ADAS-Cog Data

In [14]:
DATA_PATH = 'data/ADNIMERGE/ADNIMERGE_29Apr2024_wFiles.csv'
patients = pd.read_csv(DATA_PATH)

patients = patients.dropna(subset=['ADAS11'])
# patients = patients[patients['DX_bl'] == 'AD']

sample = patients


## Get Information from Samples

In [5]:
len(sample)

1339

In [6]:
sample['AGE'].mean()

71.09820627802691

In [7]:
print('Min age: ', sample['AGE'].min())
print('Max age: ', sample['AGE'].max())
print('Range: ', sample['AGE'].max() - sample['AGE'].min())

Min age:  55.0
Max age:  89.6
Range:  34.599999999999994


In [52]:
sample['PTGENDER'].value_counts()

PTGENDER
Female    637
Male      625
Name: count, dtype: int64

## ABeta-Tau Scans

In [11]:
DATA_PATH = 'data/abeta_tau_data/av1451_mni152_5_21_2024.csv'
patients = pd.read_csv(DATA_PATH)

In [12]:
# sample = patients[patients['Group'] == 'CN']
sample = patients.drop(patients[patients['Group'] == 'SMC'].index)
# sample = patients

In [13]:
print('Sample size: ', len(sample))
print('Mean age: ', sample['Age'].mean())

print('Min age: ', sample['Age'].min())
print('Max age: ', sample['Age'].max())
print('Range: ', sample['Age'].max() - sample['Age'].min())

print(sample['Sex'].value_counts())
print(sample['Group'].value_counts())

Sample size:  296
Mean age:  74.05405405405405
Min age:  57
Max age:  95
Range:  38
Sex
F    168
M    128
Name: count, dtype: int64
Group
CN     168
MCI     92
AD      36
Name: count, dtype: int64
