In [None]:
import pandas as pd
import numpy as np

Total of 348 participants

# Registered Data Descriptions

In [None]:
desc = pd.read_csv('PREVENT-AD_internal_n386__Data_Dictionary.csv', index_col =0)
desc.data_table

In [None]:
desc.head()

# Data Sets

## AD8

Structured Alzheimer Dementia 8 (AD8) interview (administered to study partner).
Partner rated the participant on eight functional abilities intended to discriminate normal cognitive aging from very mild dementia. The AD8 was designed specifically for the detection of change over time. Beginning in 2015, each participant was also asked to rate subjective change in memory abilities using the Measurement of Everyday Cognition (ECog). This instrument uses a four-point scale to ascertain perceived changes over the past year. Although administered annually, these ratings did not necessarily coincide with annual FU visits.

In [None]:
AD8 = pd.read_csv('PREVENT-AD_internal_n386__AD8.csv')
AD8

In [None]:
len(AD8.CandID.unique())

In [None]:
len(AD8.PSCID.unique())

In [None]:
AD8.Study_visit_label.unique()

In [None]:
len(AD8.iloc[np.where(AD8.Study_visit_label == 'PREBL00')])

In [None]:
len(AD8.iloc[np.where(AD8.Study_visit_label == 'NAPFU48')])

In [None]:
PREBL00_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'PREBL00')])
PREFU12_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'PREFU12')])
PREFU24_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'PREFU24')])
PREFU36_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'PREFU36')])
PREFU48_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'PREFU48')])
NAPBL00_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'NAPBL00')])
NAPFU03_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'NAPFU03')])
NAPFU12_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'NAPFU12')])
NAPFU24_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'NAPFU24')])
NAPFU36_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'NAPFU36')])
NAPFU48_sum = len(AD8.iloc[np.where(AD8.Study_visit_label == 'NAPFU48')])


import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 6))
ax = fig.add_axes([0,0,1,1])
study_visits = AD8.Study_visit_label.unique()
number_participants = [NAPBL00_sum, NAPFU03_sum, NAPFU12_sum, NAPFU24_sum, NAPFU36_sum, NAPFU48_sum, PREBL00_sum,PREFU12_sum,PREFU24_sum,PREFU36_sum, PREFU48_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize = 16)
plt.show()

IMPORTANT NOTES about the label convention: The first visit in the program is always labelled as PREEL00. INTREPAD trial participants are identified at enrolment visit (NAPEN00) and the following (ex.: NAPBL00, NAPFU03, etc). Even after the termination of the treatment and trial protocol (24 months; FU24), INTREPAD participants remain named as NAP for all the following annual FU (NAPFU36, NAPFU48, for example). However, if a participant was not able to follow study protocol until NAPFU03, he/she was excluded from the trial and ‘switched back’ to the observational cohort (PRE) to continue to be followed annually (PREFU12 and following). Central auditory processing (AP) also has its own label, even if this test was performed in concordance with BL and or FU. Lumbar puncture (LP) stand alone as the procedure was done on a separate day, close to the annual FU.

Lists of participants who switched back from NAPBL to PREFU
* MTL0015
* MTL0086
* MTL0102
* MTL0106
* MTL0107
* MTL0111
* MTL0154
* MTL0205
* MTL0217
* MTL0228
* MTL0235
* MTL0290
* MTL0386
* MTL0412
* MTL0415
* MTL0424


In [None]:
from IPython.display import Image

Image(filename = 'prevent_AD_label.png', width=1000, height=100)

In [None]:
#COMBINED COHORTS

import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 6))
ax = fig.add_axes([0,0,1,1])
#in months
study_visits = (0,12,24,36,48)
number_participants = [PREBL00_sum+NAPBL00_sum,PREFU12_sum+NAPFU12_sum,PREFU24_sum+NAPFU24_sum,PREFU36_sum+NAPFU36_sum, PREFU48_sum+NAPFU48_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize=16)
ax.set_xticks(study_visits, fontsize=16)
ax.set_xlabel('Time in months since first visit', fontsize=18)
ax.set_ylabel('Number of participants', fontsize=18)
ax.set_title('Attrition rate of follow-up', fontsize=20)
plt.show()


In [None]:
AD8_desc = desc.iloc[np.where(desc['data_table']=='AD8')]
AD8_desc.index
for i in range(0,len(AD8_desc.index)):
    print(f'{AD8_desc["data_table"].values[i]} : {AD8_desc["description"].values[i]}')

## Alzheimer Progression Score (APS)

Alzheimer Progression Score (APS) used as the primary outcome measure in INTREPAD. The scores are available within the INTREPAD shared dataset. The APS was developed in conjunction with colleagues at Johns Hopkins University. A composite such as the APS was envisioned at the time INTREPAD was designed, but its development and validation relied on data from parallel assessments in the longitudinal observational (non-trial) cohort. The APS is based on an Item Response Theory latent-variable approach to the many potentially informative data points collected longitudinally in PREVENT-AD.

In [None]:
APS = pd.read_csv('PREVENT-AD_internal_n386__APS.csv')
APS

In [None]:
len(APS.PSCID.unique())

In [None]:
APS.Study_visit_label.unique()

In [None]:
APS_desc = desc.iloc[np.where(desc['data_table']=='APS')]
APS_desc.head(10)
APS_desc.index
for i in range(0,len(APS_desc.index)):
    print(f'{APS_desc["data_table"].values[i]} : {APS_desc["description"].values[i]}')

In [None]:
PREBL00_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'PREBL00')])
PREFU12_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'PREFU12')])
PREFU24_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'PREFU24')])
PREFU36_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'PREFU36')])
PREFU48_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'PREFU48')])
NAPBL00_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'NAPBL00')])
NAPFU03_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'NAPFU03')])
NAPFU12_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'NAPFU12')])
NAPFU24_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'NAPFU24')])
NAPFU36_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'NAPFU36')])
NAPFU48_sum = len(APS.iloc[np.where(APS.Study_visit_label == 'NAPFU48')])


#COMBINED COHORTS

import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 6))
ax = fig.add_axes([0,0,1,1])
#in months
study_visits = (0,12,24)
number_participants = [PREBL00_sum+NAPBL00_sum,PREFU12_sum+NAPFU12_sum,PREFU24_sum+NAPFU24_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize=16)
ax.set_xticks(study_visits, fontsize=16)
ax.set_xlabel('Time in months since first visit', fontsize=18)
ax.set_ylabel('Number of participants', fontsize=18)
ax.set_title('Attrition rate of follow-up', fontsize=20)
plt.show()



## Auditory Processing (3 tests)

We tested central auditory processing (CAP) using the Synthetic Sentence Identification with Ipsilateral Competing Message (SSI-ICM) test and the Dichotic Stimulus Identification (DSI) test.

After having first been assessed for simple auditory acuity (with monosyllabic words), participants were asked to identify spoken “pseudo-sentences,” either with various sound levels of a distracting background narrative (SSI-ICM) or with dichotic binaural presentation (DSI). The latter test was available only in French.

In the SSI-ICM test, one pseudo-sentence is heard while a story is recited in the background. Both the sentence and story are played in the same (ipsilateral) ear. The participant is asked to identify the target sentence among 10 choices offered. Participants performed this task a minimum of 10 and a maximum of 30 times, with designated score-dependent stopping points.40 The other ear was then tested using the same protocol. SSI-ICM testing can typically be completed in less than 30 minutes.

The DSI task tests dichotic listening capability. For this task different pseudo- sentences are played simultaneously in the two ears. Participants a asked to identify the two target sentences from a list of 10. Participants performed this task a minimum of 5 and a maximum of 10 times, with designated score- dependent stopping points,40 in a session requiring less than 15 minutes.

In [None]:
Aud_pro = pd.read_csv('PREVENT-AD_internal_n386__Auditory_processing.csv')
Aud_pro

In [None]:
len(Aud_pro.PSCID.unique())

In [None]:
Aud_pro.Study_visit_label.unique()

In [None]:
Aud_pro_desc = desc.iloc[np.where(desc['data_table']=='Auditory_processing')]
Aud_pro_desc.head(10)
Aud_pro_desc.index
for i in range(0,len(Aud_pro_desc.index)):
    print(f'{Aud_pro_desc["data_table"].values[i]} : {Aud_pro_desc["description"].values[i]}')

## BP Pulse Weight

In [None]:
BP_Pulse_Weight = pd.read_csv('PREVENT-AD_internal_n386__BP_Pulse_Weight.csv')
BP_Pulse_Weight

In [None]:
BP_Pulse_Weight.Study_visit_label.unique()

In [None]:
BP_Pulse_Weight_desc = desc.iloc[np.where(desc['data_table']=='BP_Pulse_Weight')]
BP_Pulse_Weight_desc.head(10)
BP_Pulse_Weight_desc.index
for i in range(0,len(BP_Pulse_Weight_desc.index)):
    print(f'{BP_Pulse_Weight_desc["data_table"].values[i]} : {BP_Pulse_Weight_desc["description"].values[i]}')

In [None]:
PREEL00_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'PREEL00')])
PREBL00_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'PREBL00')])
PREFU12_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'PREFU12')])
PREFU24_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'PREFU24')])
PREFU36_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'PREFU36')])
PREFU48_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'PREFU48')])
NAPEN00_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'NAPEN00')])
NAPBL00_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'NAPBL00')])
NAPFU03_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'NAPFU03')])
NAPFU12_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'NAPFU12')])
NAPFU24_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'NAPFU24')])
NAPFU36_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'NAPFU36')])
NAPFU48_sum = len(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Study_visit_label == 'NAPFU48')])


import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 6))
ax = fig.add_axes([0,0,1,1])
study_visits = ['NAPEN00','NAPBL00', 'NAPFU03', 'NAPFU12', 'NAPFU24', 'NAPFU36', 'NAPFU48', 'PREEL00', 'PREBL00','PREFU12','PREFU24','PREFU36', 'PREFU48']
number_participants = [NAPEN00_sum, NAPBL00_sum, NAPFU03_sum, NAPFU12_sum, NAPFU24_sum, NAPFU36_sum, NAPFU48_sum, PREEL00_sum, PREBL00_sum,PREFU12_sum,PREFU24_sum,PREFU36_sum, PREFU48_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize = 16)
plt.show()

## CSF Proteins

In [None]:
CSF_Proteins = pd.read_csv('PREVENT-AD_internal_n386__CSF_proteins.csv')
CSF_Proteins

In [None]:
CSF_Proteins.Study_visit_label.unique()

In [None]:
PREBL00_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'PREBL00')])
PREFU12_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'PREFU12')])
PREFU24_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'PREFU24')])
PREFU36_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'PREFU36')])
PREFU48_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'PREFU48')])
NAPBL00_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'NAPBL00')])
NAPFU03_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'NAPFU03')])
NAPFU12_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'NAPFU12')])
NAPFU24_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'NAPFU24')])
NAPFU36_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'NAPFU36')])
NAPFU48_sum = len(CSF_Proteins.iloc[np.where(CSF_Proteins.Study_visit_label == 'NAPFU48')])


fig = plt.figure(figsize=(10, 6))
ax = fig.add_axes([0,0,1,1])
#in months
study_visits = (0,12,24,36,48)
number_participants = [PREBL00_sum+NAPBL00_sum,PREFU12_sum+NAPFU12_sum,PREFU24_sum+NAPFU24_sum,PREFU36_sum+NAPFU36_sum, PREFU48_sum+NAPFU48_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize=16)
ax.set_xticks(study_visits, fontsize=16)
ax.set_xlabel('Time in months since first visit', fontsize=18)
ax.set_ylabel('Number of participants', fontsize=18)
ax.set_title('Attrition rate of follow-up', fontsize=20)
plt.show()

In [None]:
CSF_Proteins_desc = desc.iloc[np.where(desc['data_table']=='CSF_proteins')]
CSF_Proteins_desc.head(10)
CSF_Proteins_desc.index
for i in range(0,len(CSF_Proteins_desc.index)):
    print(f'{CSF_Proteins_desc["data_table"].values[i]} : {CSF_Proteins_desc["description"].values[i]}')

## Demographics 

In [None]:
Dem = pd.read_csv('PREVENT-AD_internal_n386__Demographics.csv')
Dem

In [None]:
list(Dem.columns)

In [None]:
Dem_desc = desc.iloc[np.where(desc['data_table']=='Demographics')]
Dem_desc.head(10)
Dem_desc.index
for i in range(0,len(Dem_desc.index)):
    print(f'{Dem_desc["data_table"].values[i]} : {Dem_desc["description"].values[i]}')

## Cardiovascular risk score (CAIDE)

In [None]:
EL_CAIDE = pd.read_csv('PREVENT-AD_internal_n386__EL_CAIDE.csv')
EL_CAIDE

In [None]:
EL_CAIDE.Study_visit_label.unique()

In [None]:
EL_CAIDE_desc = desc.iloc[np.where(desc['data_table']=='EL_CAIDE')]
EL_CAIDE_desc.head(10)
EL_CAIDE_desc.index
for i in range(0,len(EL_CAIDE_desc.index)):
    print(f'{EL_CAIDE_desc["data_table"].values[i]} : {EL_CAIDE_desc["description"].values[i]}')

## CDR MoCA

In [None]:
EL_CDR_MoCA = pd.read_csv('PREVENT-AD_internal_n386__EL_CDR_MoCA.csv')
EL_CDR_MoCA

In [None]:
list(EL_CDR_MoCA.columns)

In [None]:
EL_CDR_MoCA.Study_visit_label.unique()

In [None]:
EL_CDR_MoCA_desc = desc.iloc[np.where(desc['data_table']=='EL_CDR_MoCA')]
EL_CDR_MoCA_desc.head(10)
EL_CDR_MoCA_desc.index
for i in range(0,len(EL_CDR_MoCA_desc.index)):
    print(f'{EL_CDR_MoCA_desc["data_table"].values[i]} : {EL_CDR_MoCA_desc["description"].values[i]}')

## Medical History

In [None]:
EL_med_hist = pd.read_csv('PREVENT-AD_internal_n386__EL_Medical_history.csv')
EL_med_hist

In [None]:
EL_med_hist.Study_visit_label.unique()

In [None]:
EL_med_hist_desc = desc.iloc[np.where(desc['data_table']=='EL_Medical_history')]
EL_med_hist_desc.head(10)
EL_med_hist_desc.index
for i in range(0,len(EL_med_hist_desc.index)):
    print(f'{EL_med_hist_desc["data_table"].values[i]} : {EL_med_hist_desc["description"].values[i]}')

## Genetics

In [None]:
genetics = pd.read_csv('PREVENT-AD_internal_n386__Genetics.csv')
genetics

In [None]:
genetics_desc = desc.iloc[np.where(desc['data_table']=='Genetics')]
genetics_desc.head(10)
genetics_desc.index
for i in range(0,len(genetics_desc.index)):
    print(f'{genetics_desc["data_table"].values[i]} : {genetics_desc["description"].values[i]}')

## Lab Assays

In [None]:
lab = pd.read_csv('PREVENT-AD_internal_n386__Lab.csv')
lab

In [None]:
lab.Study_visit_label.unique()

In [None]:
lab_desc = desc.iloc[np.where(desc['data_table']=='Lab')]
lab_desc.head(10)
lab_desc.index
for i in range(0,len(lab_desc.index)):
    print(f'{lab_desc["data_table"].values[i]} : {lab_desc["description"].values[i]}')

## Medication Use

In [None]:
Med_use = pd.read_csv('PREVENT-AD_internal_n386__Med_use.csv')
Med_use

In [None]:
Med_use.Study_visit_label.unique()

In [None]:
len(Med_use.PSCID.unique())

In [None]:
Med_use_desc = desc.iloc[np.where(desc['data_table']=='Med_use')]
Med_use_desc.head(10)
Med_use_desc.index
for i in range(0,len(Med_use_desc.index)):
    print(f'{Med_use_desc["data_table"].values[i]} : {Med_use_desc["description"].values[i]}')

## Repeatable Battery for Assessment of Neuropsychological Status (RBANS)

In [None]:
RBANS = pd.read_csv('PREVENT-AD_internal_n386__RBANS.csv')
RBANS

In [None]:
len(RBANS.PSCID.unique())

In [None]:
RBANS.Visit_label.unique()

In [None]:
RBANS.Study_visit_label.unique()

In [None]:
PREBL00_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREBL00')])
PREFU12_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREFU12')])
PREFU24_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREFU24')])
PREFU36_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREFU36')])
PREFU48_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREFU48')])
PREFU60_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREFU60')])
PREFU72_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREFU72')])
PREFU84_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'PREFU84')])
NAPBL00_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPBL00')])
NAPFU03_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU03')])
NAPFU12_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU12')])
NAPFU24_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU24')])
NAPFU36_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU36')])
NAPFU48_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU48')])
NAPFU60_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU60')])
NAPFU72_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU72')])
NAPFU84_sum = len(RBANS.iloc[np.where(RBANS.Study_visit_label == 'NAPFU84')])


import matplotlib.pyplot as plt
fig = plt.figure(figsize=(16, 6))
ax = fig.add_axes([0,0,1,1])
study_visits = ['NAPBL00', 'NAPFU03', 'NAPFU12', 'NAPFU24', 'NAPFU36', 'NAPFU48', 'NAPFU60', 'NAPFU72', 'NAPFU84', 'PREBL00','PREFU12','PREFU24','PREFU36', 'PREFU48', 'PREFU60', 'PREFU72', 'PREFU84']
number_participants = [NAPBL00_sum, NAPFU03_sum, NAPFU12_sum, NAPFU24_sum, NAPFU36_sum, NAPFU48_sum, NAPFU60_sum, NAPFU72_sum, NAPFU84_sum, PREBL00_sum,PREFU12_sum,PREFU24_sum,PREFU36_sum, PREFU48_sum, PREFU60_sum, PREFU72_sum, PREFU84_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize = 16)
plt.show()

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_axes([0,0,1,1])
#in months
study_visits = (0,12,24,36,48,60,72,84)
number_participants = [PREBL00_sum+NAPBL00_sum,PREFU12_sum+NAPFU12_sum,PREFU24_sum+NAPFU24_sum,PREFU36_sum+NAPFU36_sum, PREFU48_sum+NAPFU48_sum, PREFU60_sum+NAPFU60_sum, PREFU72_sum+NAPFU72_sum, PREFU84_sum+NAPFU84_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize=16)
ax.set_xticks(study_visits, fontsize=16)
ax.set_xlabel('Time in months since first visit', fontsize=18)
ax.set_ylabel('Number of participants', fontsize=18)
ax.set_title('Attrition rate of follow-up', fontsize=20)
plt.show()

In [None]:
RBANS_desc = desc.iloc[np.where(desc['data_table']=='RBANS')]
RBANS_desc.head(10)
RBANS_desc.index
for i in range(0,len(RBANS_desc.index)):
    print(f'{RBANS_desc["data_table"].values[i]} : {RBANS_desc["description"].values[i]}')

## Smell identification test

In [None]:
Smell = pd.read_csv('PREVENT-AD_internal_n386__Smell_identification.csv')
Smell

In [None]:
Smell.Study_visit_label.unique()

In [None]:
PREBL00_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREBL00')])
PREFU12_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREFU12')])
PREFU24_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREFU24')])
PREFU36_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREFU36')])
PREFU48_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREFU48')])
PREFU60_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREFU60')])
PREFU72_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREFU72')])
PREFU84_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'PREFU84')])
NAPBL00_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPBL00')])
NAPFU03_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU03')])
NAPFU12_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU12')])
NAPFU24_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU24')])
NAPFU36_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU36')])
NAPFU48_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU48')])
NAPFU60_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU60')])
NAPFU72_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU72')])
NAPFU84_sum = len(Smell.iloc[np.where(Smell.Study_visit_label == 'NAPFU84')])


import matplotlib.pyplot as plt
fig = plt.figure(figsize=(16, 6))
ax = fig.add_axes([0,0,1,1])
study_visits = ['NAPBL00', 'NAPFU03', 'NAPFU12', 'NAPFU24', 'NAPFU36', 'NAPFU48', 'NAPFU60', 'PREBL00','PREFU12','PREFU24','PREFU36', 'PREFU48', 'PREFU60']
number_participants = [NAPBL00_sum, NAPFU03_sum, NAPFU12_sum, NAPFU24_sum, NAPFU36_sum, NAPFU48_sum, NAPFU60_sum, PREBL00_sum,PREFU12_sum,PREFU24_sum,PREFU36_sum, PREFU48_sum, PREFU60_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize = 16)
plt.show()

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_axes([0,0,1,1])
#in months
study_visits = (0,12,24,36,48,60)
number_participants = [PREBL00_sum+NAPBL00_sum,PREFU12_sum+NAPFU12_sum,PREFU24_sum+NAPFU24_sum,PREFU36_sum+NAPFU36_sum, PREFU48_sum+NAPFU48_sum, PREFU60_sum+NAPFU60_sum]
values = ax.bar(study_visits,number_participants)
ax.bar_label(values, fontsize=16)
ax.set_xticks(study_visits, fontsize=16)
ax.set_xlabel('Time in months since first visit', fontsize=18)
ax.set_ylabel('Number of participants', fontsize=18)
ax.set_title('Attrition rate of follow-up', fontsize=20)
plt.show()

In [None]:
Smell_desc = desc.iloc[np.where(desc['data_table']=='Smell_identification')]
Smell_desc.head(10)
Smell_desc.index
for i in range(0,len(Smell_desc.index)):
    print(f'{Smell_desc["data_table"].values[i]} : {Smell_desc["description"].values[i]}')

# Creating datasets for the different timeframes

## Preliminary evaluation (PREEL00)

In [None]:
EL_CAIDE.iloc[np.where(EL_CAIDE.Visit_label == 'EL00')]

In [None]:
EL_CAIDE

In [None]:
EL_CDR_MoCA.iloc[np.where(EL_CDR_MoCA.Visit_label == 'EL00')]

In [None]:
EL_med_hist.iloc[np.where(EL_med_hist.Visit_label == 'EL00')]

In [None]:
EL_med_hist

In [None]:
BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Visit_label == 'EL00')]

In [None]:
lab.iloc[np.where(lab.Visit_label == 'EL00')]

In [None]:
Med_use.iloc[np.where(Med_use.Visit_label == 'EL00')]

In [None]:
genetics

In [None]:
Dem

In [None]:
fixed = Dem.merge(genetics, on='CandID')
fixed = fixed.merge(Med_use.iloc[np.where(Med_use.Visit_label == 'EL00')], on='CandID')
fixed = fixed.merge(lab.iloc[np.where(lab.Visit_label == 'EL00')], on='CandID')
fixed = fixed.merge(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Visit_label == 'EL00')], on='CandID')
fixed = fixed.merge(EL_med_hist, on='CandID')
fixed = fixed.merge(EL_CAIDE, on='CandID')
fixed = fixed.merge(EL_CDR_MoCA, on='CandID')

In [None]:
fixed = Dem.merge(genetics, on='CandID')
fixed

In [None]:
fixed.columns

In [None]:
sorted(fixed.columns)

In [None]:
for i in fixed.isna().sum():
    print(i)

In [None]:
len(fixed.columns)

In [None]:
fixed.to_csv('EL00.csv')

In [None]:
for i in fixed.columns:
    print(i)

In [None]:
Med_use.iloc[np.where(Med_use.Visit_label == 'EL00')].columns

In [None]:
fixed = fixed.merge(Med_use.iloc[np.where(Med_use.Visit_label == 'EL00')], left_on='CandID_demo', right_on= 'CandID')

In [None]:
fixed

## Longitudinal data

In [None]:
def cols_to_use(df1,df2):
    cols_to_use = list(df2.columns.difference(df1.columns))
    cols_to_use.append('CandID')
    return cols_to_use

In [None]:
cols_to_use(AD8, APS)

In [None]:
APS.columns.difference(AD8.columns)

In [None]:
APS.columns

In [None]:
AD8.columns

In [None]:
def long_data_merge_v1(visit_label):
    df_tx = AD8.iloc[np.where(AD8.Visit_label == visit_label)]
    df_tx = df_tx.merge(APS.iloc[np.where(APS.Visit_label == visit_label)], on='CandID', how = 'outer', suffixes=['_AD8','_APS'])
    df_tx = df_tx.merge(Aud_pro.iloc[np.where(Aud_pro.Visit_label == visit_label)], on='CandID', how = 'outer',suffixes=[None,'_Aud_pro'])
    df_tx = df_tx.merge(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Visit_label == visit_label)], on='CandID', how = 'outer', suffixes=[None,'_BP_Pulse_Weight'])
    df_tx = df_tx.merge(CSF_Proteins.iloc[np.where(CSF_Proteins.Visit_label == visit_label)], on='CandID', how = 'outer', suffixes=[None,'_CSF_Proteins'])
    df_tx = df_tx.merge(lab.iloc[np.where(lab.Visit_label == visit_label)], on='CandID', how = 'outer', suffixes=[None,'_lab'])
    df_tx = df_tx.merge(Med_use.iloc[np.where(Med_use.Visit_label == visit_label)], on='CandID', how = 'outer', suffixes=[None,'_Med_use'])
    df_tx = df_tx.merge(RBANS.iloc[np.where(RBANS.Visit_label == visit_label)], on='CandID', how = 'outer', suffixes=[None,'_RBANS'])
    df_tx = df_tx.merge(Smell.iloc[np.where(Smell.Visit_label == visit_label)], on='CandID', how = 'outer', suffixes=[None,'_Smell'])
    df_tx.to_csv(f'{visit_label}_outter_merge.csv')
    

In [None]:
def long_data_merge_v2(visit_label):
    df_tx = AD8.iloc[np.where(AD8.Visit_label == visit_label)]
    df_tx = df_tx.merge(APS.iloc[np.where(APS.Visit_label == visit_label)][cols_to_use(df_tx, APS)], on='CandID', how = 'outer')
    df_tx = df_tx.merge(Aud_pro.iloc[np.where(Aud_pro.Visit_label == visit_label)][cols_to_use(df_tx, Aud_pro)], on='CandID', how = 'outer')
    df_tx = df_tx.merge(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Visit_label == visit_label)][cols_to_use(df_tx, BP_Pulse_Weight)], on='CandID', how = 'outer')
    df_tx = df_tx.merge(CSF_Proteins.iloc[np.where(CSF_Proteins.Visit_label == visit_label)][cols_to_use(df_tx, CSF_Proteins)], on='CandID', how = 'outer')
    df_tx = df_tx.merge(lab.iloc[np.where(lab.Visit_label == visit_label)][cols_to_use(df_tx, lab)], on='CandID', how = 'outer')
    df_tx = df_tx.merge(Med_use.iloc[np.where(Med_use.Visit_label == visit_label)][cols_to_use(df_tx, Med_use)], on='CandID', how = 'outer')
    df_tx = df_tx.merge(RBANS.iloc[np.where(RBANS.Visit_label == visit_label)][cols_to_use(df_tx, RBANS)], on='CandID', how = 'outer')
    df_tx = df_tx.merge(Smell.iloc[np.where(Smell.Visit_label == visit_label)][cols_to_use(df_tx, Smell)], on='CandID', how = 'outer')
    df_tx.to_csv(f'{visit_label}_remove_duplicates.csv')
    

In [None]:
def long_data_merge_v3(visit_label):
    df_tx = AD8.iloc[np.where(AD8.Visit_label == visit_label)]
    df_tx = df_tx.merge(APS.iloc[np.where(APS.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx = df_tx.merge(Aud_pro.iloc[np.where(Aud_pro.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx = df_tx.merge(BP_Pulse_Weight.iloc[np.where(BP_Pulse_Weight.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx = df_tx.merge(CSF_Proteins.iloc[np.where(CSF_Proteins.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx = df_tx.merge(lab.iloc[np.where(lab.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx = df_tx.merge(Med_use.iloc[np.where(Med_use.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx = df_tx.merge(RBANS.iloc[np.where(RBANS.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx = df_tx.merge(Smell.iloc[np.where(Smell.Visit_label == visit_label)], on='CandID', how = 'cross')
    df_tx.to_csv(f'{visit_label}_cross.csv')

In [None]:
for visit in RBANS.Visit_label.unique():
    long_data_merge_v1(visit)

In [None]:
RBANS.Visit_label.unique()

In [None]:
long_data_merge_v1('BL00')

In [None]:
BL00_outter_merge = pd.read_csv('BL00_outter_merge.csv')
BL00_outter_merge

In [None]:
BL00_outter_merge.isna().sum()

In [None]:
BL00_outter_merge.columns

In [None]:
BL00_remove_dup = pd.read_csv('BL00_remove_duplicates.csv')
BL00_remove_dup

In [None]:
BL00 = pd.read_csv('BL00.csv')
FU03 = pd.read_csv('FU03.csv')
FU12 = pd.read_csv('FU12.csv')
FU24 = pd.read_csv('FU24.csv')
FU48 = pd.read_csv('FU48.csv')
FU60 = pd.read_csv('FU60.csv')
FU72 = pd.read_csv('FU72.csv')
FU84 = pd.read_csv('FU84.csv')