In [None]:
import os
import pandas as pd
import DownloadKsads
import PandasHelper as h
from download.redcap import Redcap
from KsadsHelper import KSADS
import requests

ksads = KSADS()
redcap = Redcap()

In [None]:
from config import LoadSettings

config = LoadSettings()['Redcap']
redK = config['datasources']['ksads']
ksad_token = redK['token']
redcap_ksads = Redcap(redK['url'])


In [None]:
studyids = redcap.getredcapids()
studydata = redcap.getredcapdata()

### Note
The following code <b>takes</b> 20-30min to run due to KSADS.net taking a long time.

In [None]:
%time DownloadKsads.main()

In [None]:
ksads.dates

In [None]:
ksads.olddate = ksads.dates[0]
ksads.newdate = ksads.dates[-1]

In [None]:
overall = {}

# Intro

In [None]:
form = 'intro'
data = ksads.read_data(form)
overall[form] = data

In [None]:
data['merged'] = data['merged'].drop_duplicates(['patientid','patienttype'])
df = data['merged']

In [None]:
df = data['merged']
df = df[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
data['added'].dropna(subset=['additionalinfo'])

### Quality Control

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)].copy()
duplicates['reason'] = 'Duplicate IDs'
duplicates.sort_values(['patientid','patienttype'])
data['duplicates'] = duplicates
ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Screener

In [None]:
form = 'screener'
data = ksads.read_data(form)
overall[form] = data

In [None]:
data['merged'] = data['merged'].drop_duplicates(['patientid','patienttype'], keep='last')
df = data['merged']

In [None]:
df = data['merged']
df = df[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
data['added'].dropna(subset=['additionalinfo'])

### Quality Control

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)]
duplicates['reason'] = 'Duplicate IDs'
data['duplicates'] = duplicates
ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Supplement

In [None]:
form = 'supplement'
data = ksads.read_data(form)
overall[form] = data

In [None]:
data['merged'] = data['merged'].drop_duplicates(['patientid','patienttype'])
df = data['merged']

In [None]:
df = data['merged']
df = df[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
data['added'].dropna(subset=['additionalinfo'])

### Quality Control

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)]
duplicates['reason'] = 'Duplicate IDs'
data['duplicates'] = duplicates
ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Upload New Data

In [None]:
def put_data(d):
    data = {
        'token': '',
        'content': 'record',
        'format': 'csv',
        'type': 'flat',
        'overwriteBehavior': 'overwrite',
        'forceAutoNumber': 'false',
        'data': d.to_csv(index=False),
        'returnContent': 'ids',
        'returnFormat': 'json',
    }
    r = requests.post('https://redcap.wustl.edu/redcap/api/', data)
    return r

In [None]:
x = put_data(overall['intro']['added']).json()
len(x)

In [None]:
y = put_data(overall['screener']['added']).json()
len(y)

In [None]:
z = put_data(overall['supplement']['added']).json()
len(z)