In [None]:
# Downloading the lastest data from KSADS.net 
# takes more than 30 min
import DownloadKsads

DownloadKsads.main()
print('Done.')

In [None]:
from IPython.display import display, HTML
import ipysheet
import ipywidgets as wg
import os
import pandas as pd
import PandasHelper as h
from download.redcap import get_behavioral_ids, RedcapTable
from KsadsHelper import KSADS

from config import LoadSettings

In [None]:
def read_csv(date, form):
    return pd.read_csv(os.path.join(downloads_dir, date, form + '.csv'), low_memory=False)

In [None]:
table = RedcapTable.get_table_by_name('ksads')
studyids = get_behavioral_ids()
studydata = studyids[studyids.study != 'hcpdparent']

In [None]:
config = LoadSettings()
downloads_dir = config['KSADS']['download_dir']
dates = sorted(os.listdir(downloads_dir))

olddate = dates[0]
newdate = dates[-1]

form = 'intro'
form_complete = f'{form}_complete'
old = read_csv(olddate, form)
new = read_csv(newdate, form)
redcap_df = table.get_frame(forms=['common', form])
redcap_df = redcap_df[redcap_df[form_complete] == 1]

In [None]:
deleted = h.diff(old, new.id)
modified = h.intersection(old, new, 'id').sort_values('id')
added = h.diff(new, old.id)
added[form_complete], added['common_complete'] = 1, 1
added = h.diff(added, redcap_df)

In [None]:
if added is not None and not added.empty:
    merged = redcap_df.append(added, sort=False)
else:
    merged = redcap_df

# Intro

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
# Show additional Info
show = added.dropna(subset=['additionalinfo']).iloc[:,:6]
if not show.empty:
    display(HTML('<H3>Additional Info </H3><SMALL> Please read the additional info columns for possible clarifications/corrections</SMALL>'))
    display(ipysheet.sheet(ipysheet.from_dataframe(show)))

### Quality Control

In [None]:
dups = h.intersection(redcap_df, added, ['patientid','patienttype'], sources=('current','new'))

for id, group in dups.groupby('id'):
    cols = ['id','patientid'] + h.unequal_columns(group)
    show = group[cols].set_index('_merge')
    show.insert(0, "Version", "Keep")
    sheet = ipysheet.sheet(ipysheet.from_dataframe(show))
    sheetO = wg.Output()
    with sheetO:
        display(sheet)
    display(sheetO)

In [None]:
btn_size = wg.Layout(width='60px', height="100%")

In [None]:
for i in range(len(show)):
    print(show.iloc[i, 0])
    btn = wg.ToggleButton(description="Keep", layout=btn_size)
    ipysheet.cell(i, 0, btn)

In [None]:
for idx, v in show.iterrows():
    

In [None]:
with sheetO:
    print("Test")

In [None]:
sheetO.clear_output()

In [None]:
ipysheet.column()

In [None]:
ipysheet.column(0, [wg.ToggleButton(description="Keep", layout=btn_size)])

In [None]:
merged[merged.duplicated(['patientid','patienttype'], keep=False)].drop_duplicates(keep=False)

In [None]:
dups = merged[merged.duplicated(['patientid','patienttype'], keep=False)].sort_values(['patientid','patienttype'])

#ksads.warn_duplicates(duplicates, form)
dups

In [None]:
def series_is_equal(series):
    return series.duplicated(keep=False).all()

In [None]:
[colname for colname, value in dups.iteritems() if not series_is_equal(value)]

In [None]:
def diff(left, right)

In [None]:
new.merge(old, how='left', indicator=True)

In [None]:
def isequal(xlist):
    # They are all nan or are equal to each other
    return all(map(pd.isna, xlist)) or all([xlist[0] == x for x in xlist[1:]])

def unequal_columns(df):
    """ Find the name of the columns that are unequal
    """
    unequal = [colname for colname, value in df.iteritems() if not isequal(value.to_list())]
    return unequal

In [None]:
dups[unequal_columns(dups)]

In [None]:
for id, group in dups.groupby('id'):
    display(group[unequal_columns(group)])

In [None]:
old.merge()

In [None]:
for colname, value in dups.iteritems():
    xx = value.to_list()    
    x = xx.pop()
    
    if pd.isna(x) and all(map(pd.isna, xx)):
        print(colname, 'is all nan')
    elif not all([x == i for i in xx]):
        print(x, xx)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Screener

In [None]:
form = 'screener'
data = ksads.read_data(form)
overall[form] = data

In [None]:
data['merged'] = data['merged'].drop_duplicates(['patientid','patienttype'], keep='last')
df = data['merged']

In [None]:
df = data['merged']
df = df[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
data['added'].dropna(subset=['additionalinfo'])

### Quality Control

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)]
duplicates['reason'] = 'Duplicate IDs'
data['duplicates'] = duplicates
ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Supplement

In [None]:
form = 'supplement'
data = ksads.read_data(form)
overall[form] = data

In [None]:
data['merged'] = data['merged'].drop_duplicates(['patientid','patienttype'])
df = data['merged']

In [None]:
df = data['merged']
df = df[['patientid', 'patienttype', 'sitename', 'additionalinfo']].copy()
df['subject'] = df['patientid'].str.split("_", 1, expand=True)[0].str.strip()

### Additional Info
Please read the additional info columns for possible clarifications/corrections:

In [None]:
data['added'].dropna(subset=['additionalinfo'])

### Quality Control

In [None]:
duplicates = df[df.duplicated(['patientid', 'patienttype'], keep=False)]
duplicates['reason'] = 'Duplicate IDs'
data['duplicates'] = duplicates
ksads.warn_duplicates(duplicates, form)

In [None]:
not_in_redcap = h.difference(df, studyids.subject).copy()
not_in_redcap['reason'] = 'PatientID not in Redcap'
not_in_redcap.rename(columns={'sitename': 'site'}, inplace=True)
data['not_in_redcap'] = not_in_redcap
ksads.warn_not_in_redcap(not_in_redcap, form)

In [None]:
missing = h.difference(studydata, df.subject).copy()
missing = missing[missing.flagged.isnull()]
missing = missing[missing.interview_date < '2019-05-01']
missing = missing[missing.study != 'hcpa']
missing['reason'] = 'Missing in Box'
data['missing'] = missing
ksads.warn_missing(missing, form)

# Upload New Data

In [None]:
def put_data(d):
    return ksads.redcap.send_frame(d)

In [None]:
x = put_data(overall['intro']['added']).json()
len(x)

In [None]:
y = put_data(overall['screener']['added']).json()
len(y)

In [None]:
z = put_data(overall['supplement']['added']).json()
len(z)