Merge REDCap out-of-scanner Face Name Recall variables with in-scanner data pulled from IntraDB

In [None]:
import os, datetime
import csv
import sys
import shutil
from openpyxl import load_workbook
import pandas as pd
from io import BytesIO
import numpy as np
import subprocess
from scipy import stats
from ccf.box import LifespanBox
from ccf.config import LoadSettings
from ccf.redcap import RedcapTable 
config = LoadSettings()


In [None]:
box=LifespanBox(cache='./')
verbose = True
snapshotdate = datetime.datetime.today().strftime('%m_%d_%Y')
pathout="./prepped/hca" 
racepath="./prepped"
scannerpath=''

#Rosetta (a.k.a Inventory) file will have all the nda vars and pedids
extrainfo=config['rosetta']['filename']

eventlist=['visit_1_arm_1','visit_2_arm_1','visit_3_arm_1','visit_arm_1']

In [None]:
inventory=pd.read_csv(extrainfo)
inventory.columns
inventory=inventory.loc[inventory.nda_age.isnull()==False]
inventory.nda_interview_date=pd.to_datetime(inventory.nda_interview_date).dt.strftime('%m/%d/%Y')
inventory.nda_age=inventory.nda_age.round(0).astype(int)
inventory=inventory.loc[inventory.redcap_event_name.isin(eventlist)]

Fullinventory=inventory.copy()
print("Inventory w select events:",Fullinventory.shape)

In [None]:
Fullinventory.columns
ndar=Fullinventory[['REDCap_id','nda_age','nda_interview_date',
                   'pseudo_guid','subject','M/F','redcap_event_name']].rename({'nda_age':'interview_age',
                                    'nda_interview_date':'interview_date','M/F':'sex',
                                    'subject':'src_subject_id','pseudo_guid':'subjectkey'})

In [None]:
#get Redcap Facename variables and ids from Redcap hca
fieldlist=['id','redcap_event_name','subject_id','counterbalance_v1','counterbalance_v2','cb1_1a','cb1_1a_other',
           'cb1_2a','cb1_2a_other','cb1_3a','cb1_3a_other','cb1_4a',
           'cb1_4a_other','cb1_5a','cb1_5a_other','cb1_6a','cb1_6a_other','cb1_7a',
           'cb1_7a_other','cb1_8a','cb1_8a_other','cb1_9a','cb1_9a_other','cb1_10a','cb1_10a_other',
           'cb2_1a','cb2_1a_other','cb2_2a','cb2_2a_other','cb2_3a','cb2_3a_other','cb2_4a',
           'cb2_4a_other','cb2_5a','cb2_5a_other','cb2_6a','cb2_6a_other','cb2_7a','cb2_7a_other',
           'cb2_8a','cb2_8a_other','cb2_9a','cb2_9a_other','cb2_10a','cb2_10a_other']

hca=RedcapTable.get_table_by_name('hcpa').get_frame(fieldlist)
hca=hca.loc[hca.redcap_event_name.isin(eventlist)]
hca.head(20)
#counterbalance_v1 and counterbalance_v2 are both stored in visit_1_arm_1 row. Need to pull these out for stacking.



In [None]:
cb1cols = ['id','subject_id','redcap_event_name','counterbalance_v1','counterbalance_v2']+[col for col in hca.columns if 'cb1' in col]
cb2cols = [col for col in hca.columns if 'cb2' in col]

hca1=hca.loc[hca.redcap_event_name=='visit_1_arm_1'][cb1cols+cb2cols]

hca2=hca.loc[hca.redcap_event_name=='visit_2_arm_1'][cb1cols+cb2cols]
hca2.counterbalance_v2.value_counts(dropna=False) # this will be empty because counterbalance assignment happened in V1
#hca1.counterbalance_v1.value_counts()
#hca1.head()

In [None]:

hca2=hca2.drop(columns={'counterbalance_v1','counterbalance_v2','subject_id'}) #these vars empty
hcacb2=hca1.loc[hca1.counterbalance_v2.isnull()==False][['id','counterbalance_v2']]
print(hcacb2.columns)

hca2=pd.merge(hcacb2,hca2,on=['id'],how='left')
print(hca2.redcap_event_name.value_counts())
#print(hca2.columns)
#hca2.head()


In [None]:
#stack V1 CBs
hca1cb1=hca1.loc[hca1.counterbalance_v1==1.0].rename(columns={'counterbalance_v1':'version','cb1_1a':'f1_recall', 'cb1_1a_other':'f1_other', 'cb1_2a':'f2_recall',
       'cb1_2a_other':'f2_other', 'cb1_3a':'f3_recall', 'cb1_3a_other':'f3_other', 'cb1_4a':'f4_recall', 'cb1_4a_other':'f4_other',
       'cb1_5a':'f5_recall', 'cb1_5a_other':'f5_other', 'cb1_6a':'f6_recall', 'cb1_6a_other':'f6_other', 'cb1_7a':'f7_recall',
       'cb1_7a_other':'f7_other', 'cb1_8a':'f8_recall', 'cb1_8a_other':'f8_other', 'cb1_9a':'f9_recall', 'cb1_9a_other':'f9_other',
       'cb1_10a':'f10_recall', 'cb1_10a_other':'f10_other'})

hca1cb2=hca1.loc[hca1.counterbalance_v1==2.0].rename(columns={'counterbalance_v1':'version','cb2_1a':'f1_recall', 'cb2_1a_other':'f1_other', 'cb2_2a':'f2_recall',
       'cb2_2a_other':'f2_other', 'cb2_3a':'f3_recall', 'cb2_3a_other':'f3_other', 'cb2_4a':'f4_recall', 
       'cb2_4a_other':'f4_other','cb2_5a':'f5_recall', 'cb2_5a_other':'f5_other', 'cb2_6a':'f6_recall', 'cb2_6a_other':'f6_other', 
       'cb2_7a':'f7_recall','cb2_7a_other':'f7_other', 'cb2_8a':'f8_recall', 'cb2_8a_other':'f8_other', 'cb2_9a':'f9_recall', 
        'cb2_9a_other':'f9_other',
       'cb2_10a':'f10_recall', 'cb2_10a_other':'f10_other'})
hca1all=pd.concat([hca1cb1,hca1cb2]).drop(columns=['counterbalance_v2'])
hca1all.head()
hca1all.redcap_event_name.value_counts(dropna=False)
#hca1all.counterbalance_v1.value_counts(dropna=False)
hca1all.version.value_counts(dropna=False)
hca1all.to_csv('test1.csv')

In [None]:
#stack V2 cbs
hca2cb1=hca2.loc[hca2.counterbalance_v2==1.0].rename(columns={'counterbalance_v2':'version','cb1_1a':'f1_recall', 'cb1_1a_other':'f1_other', 'cb1_2a':'f2_recall',
       'cb1_2a_other':'f2_other', 'cb1_3a':'f3_recall', 'cb1_3a_other':'f3_other', 'cb1_4a':'f4_recall', 'cb1_4a_other':'f4_other',
       'cb1_5a':'f5_recall', 'cb1_5a_other':'f5_other', 'cb1_6a':'f6_recall', 'cb1_6a_other':'f6_other', 'cb1_7a':'f7_recall',
       'cb1_7a_other':'f7_other', 'cb1_8a':'f8_recall', 'cb1_8a_other':'f8_other', 'cb1_9a':'f9_recall', 'cb1_9a_other':'f9_other',
       'cb1_10a':'f10_recall', 'cb1_10a_other':'f10_other'})

hca2cb2=hca2.loc[hca2.counterbalance_v2==2.0].rename(columns={'counterbalance_v2':'version','cb2_1a':'f1_recall', 'cb2_1a_other':'f1_other', 'cb2_2a':'f2_recall',
       'cb2_2a_other':'f2_other', 'cb2_3a':'f3_recall', 'cb2_3a_other':'f3_other', 'cb2_4a':'f4_recall', 
       'cb2_4a_other':'f4_other','cb2_5a':'f5_recall', 'cb2_5a_other':'f5_other', 'cb2_6a':'f6_recall', 'cb2_6a_other':'f6_other', 
       'cb2_7a':'f7_recall','cb2_7a_other':'f7_other', 'cb2_8a':'f8_recall', 'cb2_8a_other':'f8_other', 'cb2_9a':'f9_recall', 
        'cb2_9a_other':'f9_other',
       'cb2_10a':'f10_recall', 'cb2_10a_other':'f10_other'})
hca2all=pd.concat([hca2cb1,hca2cb2])
hca2all.head()
hca2all.redcap_event_name.value_counts(dropna=False)
hca2all=hca2all.loc[~(hca2all.redcap_event_name.isnull()==True)]
hca2all.to_csv('test2.csv')

In [None]:
hca1.redcap_event_name.value_counts()
hca2.redcap_event_name.value_counts()

hcacat=pd.concat([hca1all,hca2all])
#hcacat.head()
hcacat.redcap_event_name.value_counts()

hcacat=hcacat.loc[~(hcacat.f1_recall.isnull()==True)]
hcacat.to_csv('test12.csv')
hcacat.head(50)
#hca1cb1

In [None]:
ndar.columns
#hcacat.columns

In [None]:
hcat=pd.merge(ndar,hcacat,left_on=['REDCap_id','redcap_event_name'],right_on=['id','redcap_event_name'],how='left')
hcat.head()
hcat=hcat.loc[~(hcat.version.isnull()==True)]
hcat.version=hcat.version.astype('Int64')

for i in ['f1_recall','f2_recall','f3_recall','f4_recall','f5_recall','f6_recall','f7_recall','f8_recall','f9_recall','f10_recall']:
    hcat[i]=hcat[i].astype('Int64')
    
for j in ['f7_other','f3_other','f9_other']: 
    hcat[j]=hcat[j].str.replace('"','').str.replace("'",'')
    hcat[j]=hcat[j].str[0:10]

hcat=hcat[['nda_age', 'nda_interview_date', 'pseudo_guid', 'subject',
       'M/F', 'version', 'f1_recall',
       'f1_other', 'f2_recall', 'f2_other', 'f3_recall', 'f3_other',
       'f4_recall', 'f4_other', 'f5_recall', 'f5_other', 'f6_recall',
       'f6_other', 'f7_recall', 'f7_other', 'f8_recall', 'f8_other',
       'f9_recall', 'f9_other', 'f10_recall', 'f10_other']].rename(columns=
        {'pseudo_guid':'subjectkey','subject':'src_subject_id','nda_age':'interview_age',
         'nda_interview_date':'interview_date','M/F':'sex','version':'version_form'})#'redcap_event_name', 


In [None]:
hcat.to_csv('test.csv')


## Placeholder for adding IntraDB Staging Vars. i.e. recall stats per facename recall in scanner

In [None]:
#once you get the IntraDB stats and all visits, 
#Check that
#the actual CB used at the scanner for V1 matches what was recorded in REDCap,(drop redcap if not)
#the actual CB used at the scanner for V2 matches what was recorded in REDCap,(drop redcap if not)
#the CB used at the scanner for V1 is not equal to CB used at the scanner for V2 (drop V2 if not)

#HCA6766290_V1
#HCA8163169_V1
#HCA9161877_ (not sure which visit)
#HCA9319684_V1

#of course, please check systematically, too.

In [None]:
#write out csv for validation
filePath=pathout+'/facename01.csv'

if os.path.exists(filePath):
    os.remove(filePath)
else:
    print("Can not delete the file as it doesn't exists")

with open(filePath,'a') as f:
    f.write("facename,1\n")
    hcat.to_csv(f,index=False)
