In [1]:
import pandas as pd
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

import COMBINE_harmonizer
from COMBINE_harmonizer import cfg

## 00. Init

In [2]:
root_dir = '../'

cfg.init(filename=f"{root_dir}/config.yaml")

input_dir = f"{cfg.config['out_dir']}/out-publish-normalized"

In [3]:
COMBINE_harmonizer.init_redcap(token=cfg.config["redcap_token"], host=cfg.config["redcap_host"])

## 01. Create Events Based on Categories

In [4]:
all_events = [COMBINE_harmonizer.redcap_event(event_name, category) for category, event_name in COMBINE_harmonizer.CATEGORY_EVENT_MAP.items()]

In [5]:
err, existing_events = COMBINE_harmonizer.get_redcap_data({'content': 'event'})
existing_event_ids = [each['unique_event_name'] for each in existing_events]
to_upload_events = list(filter(lambda x: x['unique_event_name'] not in existing_event_ids, all_events))

In [6]:
len(to_upload_events)

5

In [7]:
if len(to_upload_events) > 0:
    COMBINE_harmonizer.import_redcap_data('event', to_upload_events)

## 02. Construct REDCap Sheet Column Map

In [8]:
data_dict_filename = f'{root_dir}/Dictionary_HIE_clinical_variables.xlsx'
_REDCAP_MAIN_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_MAIN)
_REDCAP_FOLLOWUP_COLUMN_MAP = COMBINE_harmonizer.build_redcap_column_map(data_dict_filename, COMBINE_harmonizer.SHEET_FOLLOW_UP)

_REDCAP_SHEET_COLUMN_MAP = {
    COMBINE_harmonizer.SHEET_MAIN: _REDCAP_MAIN_COLUMN_MAP,
    COMBINE_harmonizer.SHEET_FOLLOW_UP: _REDCAP_FOLLOWUP_COLUMN_MAP,
}

## 03. Construct Filename Infos

In [9]:
filename_infos = COMBINE_harmonizer.build_redcap_filename_infos(_REDCAP_SHEET_COLUMN_MAP)

## 04. Construct All REDCap Columns

In [10]:
df_all_redcap_columns = COMBINE_harmonizer.all_redcap_columns(input_dir, filename_infos)

In [11]:
df_groupby = df_all_redcap_columns.groupby(['field_name']).agg(count=('field_name', 'count'))

is_invalid = df_groupby['count'] > 1
df_groupby[is_invalid]

Unnamed: 0_level_0,count
field_name,Unnamed: 1_level_1


## 05. Construct df_form_names

In [12]:
columns = ['form_name', 'form_category', 'form_repeated']
df_form_names = df_all_redcap_columns.drop_duplicates(columns)[columns].reset_index(drop=True)
df_form_names

Unnamed: 0,form_name,form_category,form_repeated
0,identifier,Pre-intervention,False
1,pre_screening,Pre-intervention,False
2,pre_maternal_demographics,Pre-intervention,False
3,pre_pregnancy_history,Pre-intervention,False
4,pre_labor_and_delivery,Pre-intervention,False
...,...,...,...
59,followup_status,Follow Up,False
60,followup_readmission,Follow Up,True
61,followup_lost_follow_up,Follow Up,False
62,followup_secondary_analysis,Follow Up,False


## 06. Update REDCap Columns

### 06-1. clear invalid columns

In [13]:
for column in ['form_category', 'form_repeated']:
    if column in df_all_redcap_columns:
        del df_all_redcap_columns[column]

### 06-2. sort based on form_name

In [14]:
form_id_map = {each: idx for idx, each in enumerate(df_all_redcap_columns['form_name'].unique())}

df_all_redcap_columns['form_id'] = df_all_redcap_columns['form_name'].apply(lambda x: form_id_map[x])
df_all_redcap_columns['_index'] = df_all_redcap_columns.index

df_all_redcap_columns = df_all_redcap_columns.sort_values(by=['form_id', '_index'])

for column in ['form_id', '_index']:
    del df_all_redcap_columns[column]

### 06-3. put to REDCap

In [15]:
all_redcap_columns = [dict(each) for idx, each in df_all_redcap_columns.iterrows()]

In [16]:
COMBINE_harmonizer.put_redcap_data('metadata', all_redcap_columns)

(None, 1215)

## 07. Create Form-Event Mapping

In [17]:
form_events = [COMBINE_harmonizer.redcap_form_event(row['form_name'], row['form_category']) for idx, row in df_form_names.iterrows()]

In [18]:
COMBINE_harmonizer.import_redcap_data('formEventMapping', form_events)

(None, 64)

## 08. Repeated Instruments

In [19]:
is_repeated_instrument = df_form_names['form_repeated']
df_repeated_form_names = df_form_names[is_repeated_instrument]

In [20]:
repeated_form_events = [COMBINE_harmonizer.redcap_repeated_form_event(row['form_name'], row['form_category']) for idx, row in df_repeated_form_names.iterrows()]

In [21]:
COMBINE_harmonizer.put_redcap_data('repeatingFormsEvents', repeated_form_events)

(None, 17)