In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
import pylab as plt

from importlib import reload
from functools import reduce
import glob
import datetime
import re

import os
import sys
sys.path.insert(1,'../scripts')
import utils
import plots
sys.path.insert(1,'../../phenotype')
import _loaders
import _preprocess

import pickle

In [None]:
data_path = '/scratch/c.c21013066/data/ppmi'
path = '/scratch/c.c21013066/data/ppmi/accelerometer'

In [None]:
demo = pd.read_csv(f'{data_path}/phenotypes2021/demographics.csv',parse_dates=['date_birth'])
behavior = _loaders.load_behavior(f'{data_path}/phenotypes2021')
datscan = _loaders.load_datscan_all(f'{data_path}/phenotypes2021')
genotypes = _loaders.load_genotypes(f'{data_path}/phenotypes2021')
prodromal = _loaders.load_prodromalBerg(f'{data_path}/phenotypes2021')
merged, ambulatory, step, sleep, pulse, pulsevar = utils.load_timeseries(demo,path)

In [None]:
data_path = '/scratch/c.c21013066/data/ppmi'
biospecimen = pd.read_csv(f'{data_path}/phenotypes2021/biospecimen_SAA_all_clean.csv',index_col=0)
bsp_rest = pd.read_csv(f'{data_path}/phenotypes2021/biospecimen_clean_add.csv',index_col=0).drop(columns=['index'])
biospecimen_all = pd.merge(biospecimen.iloc[:,:-1],bsp_rest,on=['participant','date','visit'],how='outer')
biospecimen_all = biospecimen_all[biospecimen_all['visit']=='BL']

In [None]:
demo = pd.read_csv(f'{data_path}/phenotypes2021/demographics.csv',parse_dates=['date_birth'])
biospfeatures = biospecimen_all.columns[-7:]
biospecimen = pd.merge(biospecimen_all,demo,on='participant',how='left')
biospecimen = _preprocess.date_to_datetime(biospecimen)
biospecimen = _preprocess.get_visit_age(biospecimen)
biospecimen = biospecimen.sort_values(['participant','date'])
biospecimen_last = biospecimen.groupby('participant').last()
datscan_ = pd.read_csv(f'{data_path}/phenotypes2021/datscan_all_clean.csv',index_col=0)
datscan_ = pd.merge(datscan_,demo,on='participant',how='left')
datscan_ = _preprocess.date_to_datetime(datscan_)
datscan_ = _preprocess.get_visit_age(datscan_)
datscan_ = _preprocess.get_DatScan_IDPs(datscan_)
datfeatures = ['datscan_caudate_mean', 'datscan_putamen_mean',
       'datscan_mean','datscan_asymmetry','datscan_caudate_asymmetry', 'datscan_putamen_asymmetry']
datscan_ = datscan_.sort_values(['participant','date'])
datscan_last = datscan_.groupby('participant').last()
datscan_bl = datscan_.groupby('participant').first()
datscan = _preprocess.get_DAT_deficit(datscan_)
dat_last = datscan.dropna(subset=['putamen_min'],how='any',axis='rows').sort_values('date').groupby('participant').last()
bio = pd.merge(biospecimen_last,datscan_,right_index=True,left_index=True,suffixes=['_bio','_dat'])
bio = _preprocess.match_only_nearest_df(datscan_.reset_index(),biospecimen.reset_index(),merge='visit_age',tolerance=2,suffixes=['','_bio'])

In [None]:
prodromal = pd.merge(dat_last.reset_index()[['participant','dat_deficit','putamen_min']],prodromal,on='participant',how='right',suffixes=['','_visual'])
prodromal['dat_deficit_visual'] = prodromal['dat_deficit_visual'].replace(['negative','positive'],[0,1])
behprod = pd.merge(behavior.reset_index().drop(columns=['rbd']),prodromal,left_on=['participant','visit'],right_on=['participant','phenoconverted_visit'],how='right')
behprod.loc[behprod['phenoconverted']==1,'phenoconverted_date'] = behprod.loc[behprod['phenoconverted']==1,'date']
prod = pd.merge(demo,genotypes,on='participant')
prod = pd.merge(prod,behprod[np.hstack([prodromal.columns,'phenoconverted_date'])],on='participant')
prod['phenoconverted_age'] = (prod['phenoconverted_date'] - prod['date_birth'])/np.timedelta64(1,"Y")

In [None]:
prod.groupby('diagnosis')[['dat_deficit_visual','dat_deficit']].agg(['mean','size','count'])

In [None]:
path = '/rds/general/user/aschalka/home/data/ppmi/accelerometer'
f = utils.read_extracted_features(f'{path}/extracted_features',names=['stepcount.csv','ambulatory.csv','prv.csv','pulserate.csv','sleepmetrics2.csv'])

In [None]:
unique = np.unique(f.columns[1:].str.split('__', n=1).str[1])
pd.Series(unique)
pd.Series(unique).to_csv(f'{path}/tsfresh_features.csv') # list of features extracted with tsfresh 783 per timeseries

In [None]:
for i,mod in enumerate([ambulatory,step,sleep,pulse,pulsevar]):
    age = mod.groupby('subject')[['age_accelerometry']].agg(['min','max','mean','median'])
    if i == 0:
        ages = age.copy(deep=True)
    else:
        ages = pd.concat([ages,age])
        ages = ages[~ages.index.duplicated(keep='first')]
ages.columns = ['_'.join(col) for col in ages.columns.values]

In [None]:
prod.to_csv('/scratch/c.c21013066/data/ppmi/analyses/prodromal/converterInfo.csv')