In [None]:
# Downloading the lastest data from KSADS.net 
# takes more than 30 min
import DownloadKsads

DownloadKsads.main()
print('Done.')

In [None]:
import os
import pandas as pd
import PandasHelper as h
from download.redcap import get_behavioral_ids, RedcapTable
from KsadsHelper import KSADS

from config import LoadSettings

In [None]:
def read_csv(date, form):
    return pd.read_csv(os.path.join(downloads_dir, date, form + '.csv'), low_memory=False)

In [None]:
table = RedcapTable.get_table_by_name('ksads')
studyids = get_behavioral_ids()
studydata = studyids[studyids.study != 'hcpdparent']

In [None]:
config = LoadSettings()
downloads_dir = config['KSADS']['download_dir']
dates = sorted(os.listdir(downloads_dir))

olddate = dates[0]
newdate = dates[-1]

form = 'intro'
form_complete = f'{form}_complete'
old = read_csv(olddate, form)
new = read_csv(newdate, form)
redcap_df = table.get_frame(forms=['common', form])
redcap_df = redcap_df[redcap_df[form_complete] == 1]

In [None]:
deleted = h.difference(old, new, 'id')
modified = h.intersection_both(old, new, 'id', sources=['old', 'new'], drop_duplicates=True)
added = h.difference(new, old, 'id').copy()
added[form_complete] = 1
added['common_complete'] = 1
added = h.difference(added, redcap_df)

In [None]:
if added is not None and not added.empty:
    merged = redcap_df.append(added, sort=False)
else:
    merged = redcap_df

In [None]:
added.shape

In [None]:
merged.shape

# Intro

In [None]:
df = merged[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
additionalinfo = added.dropna(subset=['additionalinfo'])
additionalinfo
# widget display additional info

### Quality Control

In [None]:
dups = merged[merged.duplicated(['patientid','patienttype'], keep=False)].sort_values(['patientid','patienttype'])

dups

In [None]:
new[new.duplicated(['patientid','patienttype'], keep=False)].sort_values(['patientid','patienttype'])

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)].copy()
duplicates.sort_values(['patientid','patienttype'])
# ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Screener

In [None]:
form = 'screener'
data = ksads.read_data(form)
overall[form] = data

In [None]:
data['merged'] = data['merged'].drop_duplicates(['patientid','patienttype'], keep='last')
df = data['merged']

In [None]:
df = data['merged']
df = df[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
data['added'].dropna(subset=['additionalinfo'])

### Quality Control

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)]
duplicates['reason'] = 'Duplicate IDs'
data['duplicates'] = duplicates
ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Supplement

In [None]:
form = 'supplement'
data = ksads.read_data(form)
overall[form] = data

In [None]:
data['merged'] = data['merged'].drop_duplicates(['patientid','patienttype'])
df = data['merged']

In [None]:
df = data['merged']
df = df[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
data['added'].dropna(subset=['additionalinfo'])

### Quality Control

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)]
duplicates['reason'] = 'Duplicate IDs'
data['duplicates'] = duplicates
ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Upload New Data

In [None]:
def put_data(d):
    return ksads.redcap.send_frame(d)

In [None]:
x = put_data(overall['intro']['added']).json()
len(x)

In [None]:
y = put_data(overall['screener']['added']).json()
len(y)

In [None]:
z = put_data(overall['supplement']['added']).json()
len(z)