In [68]:
from funcs import funcs
from Crosswalk.Transformer import Transformer
from Crosswalk.DataCache import DataCache
from Crosswalk.NDAWriter import NDAWriter
from Crosswalk.Manager import Manager

from Crosswalk.Loader import BoxLoader, BoxHcdLoader, QintHcdLoader, RedcapLoader, ParentLoader
import pandas as pd

In [69]:
#create folder for prepped structures, if it doesn't exist
!!mkdir prepped
!!mkdir prepped/hcd

['mkdir: cannot create directory ‘prepped/hcd’: File exists']

In [70]:
M = Manager(
        data =  DataCache(
            ParentLoader(),
            RedcapLoader('child'),
            RedcapLoader('teen'),
            BoxHcdLoader('PennCNP', 592325063896),
            QintHcdLoader()
        ),
        writer = NDAWriter(completed_dir="./prepped/hcd/", validator="/home/petra/.local/bin/vtcmd"),
        transformer = Transformer(funcs = funcs, map_dir='./maps/hcd/')
)

In [71]:
# This step requires that you have a 'rosetta stone' file that has all the required NDA fields for 
# all subjects you intend to submit at this time.  This approach facilitates keeping track of subject counts
# across data types.  For example, if your required fields are already stored in XNAT because you had the CCF
# upload your imaging data for you, you can export this csv from XNAT and rename as appropriate.  
# Place this file at the main level of this repository, and name it in your config file
# Loader.py program's _post_load_hook_ method referenced below.  the method is currently hardcoded to read this csv and rename 
# columns to NDA requirements of ['subject', 'subjectkey', 'gender', 'interview_date', 'interview_age']
# as follows.  
        #rosetta = pd.read_csv('UnrelatedHCAHCD_w_STG_Image_and_pseudo_GUID05_27_2020.csv')
        #rosetta = rosetta[['subjectped', 'nda_guid', 'nda_gender', 'nda_interview_date', 'nda_interview_age']]
        #rosetta.columns = ['subject', 'subjectkey', 'gender', 'interview_date', 'interview_age']
#future versions of this code will pull out this file into config.py or even better place, if demand warrants.
#For now, just tweak this function to read your own rosetta file, making sure to result in csv with required, or
# fill out the template file and save it as 'UnrelatedHCAHCD_w_STG_Image_and_pseudo_GUID05_27_2020.csv' or whatever 
# you want it to be under the 'rosetta' attribute in the config file

M.preload_data()

Timing:  parent 6.169095039367676
teen Some columns were unavailable:  {'bld_rucdr_des_', 'bld_core_des_'}
Timing:  teen 2.5849153995513916
child Some columns were unavailable:  {'bld_core_des_', 'bld_rucdr_des'}
Timing:  child 2.825131416320801
Timing:  PennCNP 0.055350542068481445
Timing:  qint 0.5449104309082031


In [72]:
#Ad hoc functions to clean up empty rows for particular instruments after generated (issue for redcap data)
def redcleanup(structure="lbadl01",filePath="./prepped/hcd/",extraomitcol1='NO',extraomitcol2='NO',extraomitcol3='NO',extraomitcol4='NO'):
    print(structure)
    strucroot=structure[:-2]
    strucnum=structure[-2:]

    df=pd.read_csv(filePath+structure+".csv",header=1)
    df.head()

    print("NumRows Before: "+str(df.shape[0]))
    subfields=df.columns.to_list()
    subfields.remove('subjectkey')
    subfields.remove('src_subject_id')
    subfields.remove('interview_date')
    subfields.remove('interview_age')
    subfields.remove('sex')
    if extraomitcol1 and extraomitcol1 !='NO':
        subfields.remove(extraomitcol1)
    if extraomitcol2 and extraomitcol2 !='NO':
        subfields.remove(extraomitcol2)
    if extraomitcol3 and extraomitcol3 !='NO':
        subfields.remove(extraomitcol3)
    if extraomitcol4 and extraomitcol4 !='NO':
        subfields.remove(extraomitcol4)
    df=df.dropna(how='all',subset=subfields)
    print("NumRows After: "+str(df.shape[0]))

    with open(filePath+structure+".csv",'w') as f:
        f.write(strucroot+","+str(int(strucnum))+"\n")
        df.to_csv(f,index=False)
      
#these guys already set to 99s in map, so null finder wont work above
def bisbasparent999(structure="bisbas01",filePath="./prepped/hcd/"):
    print(structure)
    strucroot=structure[:-2]
    strucnum=structure[-2:]
    df=pd.read_csv(filePath+structure+".csv",header=1)
    print("NumRows Before: "+str(df.shape[0]))
    df=df.loc[~(df.bissc_total==999)].copy()
    print("NumRows After: "+str(df.shape[0]))
    with open(filePath+structure+".csv",'w') as f:
        f.write(strucroot+","+str(int(strucnum))+"\n")
        df.to_csv(f,index=False)

def cleanlist(structurelist=['lbadl01','mchq01']):
    for i in structurelist:
        redcleanup(structure=i,filePath="./prepped/hcd/")

In [78]:
#test one to see if its working
M.run('deldisk01')
# data needs to be corrected in redcap.
# get subject/site and open trello ticket

For struct "deldisk01": 
No errors!


In [80]:
# The commented out struct doesnt seem to be laid out as requested...need to investigate further...perhaps turn into 
#singleton structure, since doesnt follow typical one row per subject format
structs = [
    'asr01',
    'bsc01',
    'cbcl01',
    'cbcl1_501',
    'cbq01',
    'deldisk01',
    'er4001',
    'leap01',
    'mab01',
    'mctq01',
    'medh01',
    'mendt01',
    'mmse01',
    'neo_ffi_form_s_adult_200301',
    'phenx_su01',
    'psqi01',
    'saiq01',
    'sdq01',
    'sleepdis01',
    'socdem01',
    'vision_tests01',
    'vitals01',
    'wais_iv_part101',
    'wisc_v01',
    'wppsiiv01',
    'ysr01'
]

#parent report as well as self report in some cases so can be multiple rows per person
structs2=[
 #   drugscr01',
    'bisbas01',  
    'eatq01',
    'fenvs01',
    'gbi01',
    'pds01',
    'scan_debrief01',
    'srs02',
    'upps01',
    'screentime01'  
]
for s in structs:
    M.run(s)

for s in structs2:
    M.run(s)



For struct "asr01": 
No errors!
For struct "bsc01": 
No errors!
For struct "cbcl01": 
No errors!
For struct "cbcl1_501": 
No errors!
For struct "cbq01": 
No errors!
For struct "deldisk01": 
No errors!
For struct "er4001": 
No errors!
For struct "leap01": 
lan3err: Invalid values  {130.0, 135.0, 200.0, 105.0, 175.0, 240.0, 181.0, 150.0, 120.0, 250.0}
Dropping 14 values.
lan4err: Invalid values  {200.0, 105.0, 300.0, 110.0, 175.0, 210.0, 180.0, 150.0, 120.0, 155.0}
Dropping 13 values.
No errors!
For struct "mab01": 
No errors!
For struct "mctq01": 
No errors!
For struct "medh01": 
No errors!
For struct "mendt01": 
No errors!
For struct "mmse01": 
No errors!
For struct "neo_ffi_form_s_adult_200301": 
No errors!
For struct "phenx_su01": 
No errors!
For struct "psqi01": 
No errors!
For struct "saiq01": 
No errors!
For struct "sdq01": 
No errors!
For struct "sleepdis01": 
No errors!
For struct "socdem01": 
No errors!
For struct "vision_tests01": 
No errors!
For struct "vitals01": 
No errors!

In [81]:
#now clean up empty rows
cleanlist(structurelist=structs)
bisbasparent999(structure="bisbas01",filePath="./prepped/hcd/")
redcleanup(structure="eatq01",filePath="./prepped/hcd/",extraomitcol1='respond')
redcleanup(structure='fenvs01',filePath="./prepped/hcd/",extraomitcol1='version_form')
redcleanup(structure='gbi01',filePath="./prepped/hcd/",extraomitcol1='version_form')
redcleanup(structure="pds01",filePath="./prepped/hcd/",extraomitcol1='respond')
redcleanup(structure='scan_debrief01',filePath="./prepped/hcd/",extraomitcol1='version_form')
redcleanup(structure='srs02',filePath="./prepped/hcd/",extraomitcol1='respond',extraomitcol2='respond_detail',extraomitcol3='phenotype')
redcleanup(structure='upps01',filePath="./prepped/hcd/",extraomitcol1='version_form')
redcleanup(structure='screentime01',filePath="./prepped/hcd/")

asr01
NumRows Before: 821
NumRows After: 161
bsc01
NumRows Before: 1480
NumRows After: 821
cbcl01
NumRows Before: 659
NumRows After: 659
cbcl1_501
NumRows Before: 659
NumRows After: 659
cbq01
NumRows Before: 659
NumRows After: 93
deldisk01
NumRows Before: 788
NumRows After: 788
er4001
NumRows Before: 788
NumRows After: 787
leap01
NumRows Before: 821
NumRows After: 821
mab01
NumRows Before: 659
NumRows After: 659
mctq01
NumRows Before: 659
NumRows After: 659
medh01
NumRows Before: 821
NumRows After: 821
mendt01
NumRows Before: 821
NumRows After: 821
mmse01
NumRows Before: 821
NumRows After: 376
neo_ffi_form_s_adult_200301
NumRows Before: 821
NumRows After: 821
phenx_su01
NumRows Before: 821
NumRows After: 821
psqi01
NumRows Before: 162
NumRows After: 161
saiq01
NumRows Before: 659
NumRows After: 659
sdq01
NumRows Before: 659
NumRows After: 659
sleepdis01
NumRows Before: 659
NumRows After: 365
socdem01
NumRows Before: 821
NumRows After: 821
vision_tests01
NumRows Before: 821
NumRows Afte