# Import changes from R4

In [37]:
import requests
import pandas
import json
import os
from datetime import datetime

from pathlib import Path

In [2]:
from config import api_config as cfg;

In [3]:
### Constants
USE_SSH = False
DATA_DIR = "./data/"

In [36]:
### Get last run time from date file
last_run_file = Path('./run_history.log')
last_run_file.touch(exist_ok=True)

num_lines = sum(1 for _ in open(last_run_file))

if num_lines < 1:
    last_runtime = '2000-01-01 01:01'
else:
    with open(last_run_file, 'r') as f:
        last_runtime = f.readlines()[-1]

print("last runtime:", last_runtime)

last runtime: 2000-01-01 01:01


In [5]:
### EXPORT data from R4/source REDCap
data = {
    'token': cfg.config['R4_api_token'],
    'content': 'record',
    'action': 'export',
    'format': 'json',
    'type': 'flat',
    'csvDelimiter': '',
    'forms[0]': 'prescreening_survey',
    'forms[1]': 'transition_page',
    'forms[2]': 'primary_consent',
    'forms[3]': 'cchmc_consent_part_2',
    'forms[4]': 'cchmc_consent_parent_permission',
    'forms[5]': 'end_of_consent_transition',
    'forms[6]': 'baseline_survey_adult',
    'forms[7]': 'baseline_survey_child',
    'forms[8]': 'pre_ror_child',
    'forms[9]': 'pre_ror_adult',
    'forms[10]': 'pre_ror_transition',
    'forms[11]': 'post_ror',
    'forms[12]': 'adverse_events',
    'forms[13]': 'study_withdrawal',
    'forms[14]': 'consent_upload',
    'forms[15]': 'notes',
    'forms[16]': 'gira_reports',
    'forms[17]': 'mono_sample',
    'forms[18]': 'broad_ordering',
    'forms[19]': 'metree_import',
    'forms[20]': 'metree',
    'forms[21]': 'family_relationships',
    'forms[22]': 'completed_signed_consent',
    'forms[23]': 'admin_form',
    'forms[24]': 'unified_variables',
    'rawOrLabel': 'raw',
    'rawOrLabelHeaders': 'raw',
    'exportCheckboxLabel': 'false',
    'exportSurveyFields': 'true',
    'exportDataAccessGroups': 'false',
    'returnFormat': 'json',
    'dateRangeBegin': last_runtime,
    'dateRangeEnd': ''
}

In [6]:
r = requests.post(cfg.config['R4_api_url'],data=data, verify=USE_SSH)



In [7]:
### store data from request
export_string = r.content.decode("utf-8")
export_dict = json.loads(export_string)
export_df = pandas.DataFrame(export_dict)

In [8]:
### IMPORT data into local REDCap
fields = {
    'token': cfg.config['R4copy_api_token'],
    'content': 'record',
    'action': 'import',
    'format': 'json',
    'events': '',
    'type': 'flat',
    'overwriteBehavior': 'normal',
    'forceAutoNumber': 'false',
    'data': export_string,
    'returnContent': 'count',
    'returnFormat': 'json'
}
r = requests.post(cfg.config['R4copy_api_url'],data=fields)
print('HTTP Status: ' + str(r.status_code))


HTTP Status: 200


In [9]:
## Check the record count. If nothing to be updated, quit the script.

record_count = r.json()['count']
if (record_count < 1):
    quit()

In [10]:
### create list of file fields that need to be exported + copied over
file_field_list = ['record_id','pdf_file','broad_import_pdf',
                   'completed_signed_consent']

In [11]:
### filter original export from above by the file fields
files_export_df = export_df[file_field_list]

In [12]:
### melt file dataframe so record, field, and filename are columns
files_eav = pandas.melt(files_export_df, id_vars=['record_id'], var_name='field', value_name='file_name')


In [13]:
### remove rows that don't have a filename (no file uploaded in R4)
filtered_files_eav = files_eav[files_eav.file_name != '']

In [14]:
### separate into consent files and non-consent files
consent_files = filtered_files_eav[filtered_files_eav.field == 'completed_signed_consent']
nonconsent_files = filtered_files_eav[filtered_files_eav.field != 'completed_signed_consent']

In [15]:
### convert EAV to a list that the "for" loop below can iterate through
consent_files_list = consent_files.values.tolist()
nonconsent_files_list = nonconsent_files.values.tolist()

In [16]:
### export consent PDF files from R4 to local folder
for ind in consent_files_list:
    record_id = ind[0]
    field = ind[1]
    filename = ind[2]
    data = {
        'token': cfg.config['R4_api_token'],
        'content': 'file',
        'action': 'export',
        'record': record_id,
        'field': field,
        'event': '',
        'returnFormat': 'json'
        }
    r = requests.post(cfg.config['R4_api_url'],data=data,verify=USE_SSH)
    print('HTTP Status: ' + str(r.status_code))
    with open(DATA_DIR + str(filename), 'wb') as f:
        f.write(r.content)
        f.close()



HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200




HTTP Status: 200


## Convert files to HIM-compatible format

In [17]:
### create dataframe of fields for consent files for HIM
him_filename_fields = ['record_id','age','name_of_participant_part1',
                       'date_consent_cchmc_pp_2','date_p2_consent_cchmc',
                       'date_of_birth_child','date_of_birth']
him_filename_df = export_df[him_filename_fields]
him_filename_df = him_filename_df[him_filename_df.name_of_participant_part1 != '']
him_filename_df = him_filename_df.astype({"age": int})

In [18]:
### merge dataframes for HIM file fields + table of consent file exports
him_consent_join = pandas.merge(him_filename_df, consent_files, on='record_id')

In [19]:
### remove whitespace from participant name
him_consent_join['name_of_participant_part1'] = him_consent_join['name_of_participant_part1'].str.replace(' ', '')

In [20]:
### reformat dates
him_consent_join['date_consent_cchmc_pp_2'] = pandas.to_datetime(him_consent_join['date_consent_cchmc_pp_2'])
him_consent_join['date_consent_cchmc_pp_2'] = him_consent_join['date_consent_cchmc_pp_2'].dt.strftime("%d%b%Y")
him_consent_join['date_p2_consent_cchmc'] = pandas.to_datetime(him_consent_join['date_p2_consent_cchmc'])
him_consent_join['date_p2_consent_cchmc'] = him_consent_join['date_p2_consent_cchmc'].dt.strftime("%d%b%Y")
him_consent_join['date_of_birth_child'] = pandas.to_datetime(him_consent_join['date_of_birth_child'])
him_consent_join['date_of_birth_child'] = him_consent_join['date_of_birth_child'].dt.strftime("%d%b%Y")
him_consent_join['date_of_birth'] = pandas.to_datetime(him_consent_join['date_of_birth'])
him_consent_join['date_of_birth'] = him_consent_join['date_of_birth'].dt.strftime("%d%b%Y")

In [21]:
### convert to list
him_consent_list = him_consent_join.values.tolist()

In [22]:
## iterate through old file names and rename, add new names to blank list
him_newnames_list = []
for ind in him_consent_list:
    oldfilename = ind[8]
    record_id = ind[0]
    age = ind[1]
    name = ind[2]
    if age < 18:
        sign_date = ind[3]
        dob = ind[5]
    else:
        sign_date = ind[4]
        dob = ind[6]
    newname = str(sign_date)+"_"+str(name)+"_"+str(dob)+".pdf"
    os.rename(DATA_DIR + str(oldfilename), DATA_DIR + str(newname))
    him_newnames_list.append(newname)

him_newnames_df = pandas.DataFrame({'newname': him_newnames_list})
him_consent_join = him_consent_join.join(him_newnames_df)          
him_consent_list = him_consent_join.values.tolist()

In [38]:
### import renamed consent files into copy of R4
for ind in him_consent_list:
    filename = ind[9]
    data = {
        'token': cfg.config['R4copy_api_token'],
        'content': 'file',
        'action': 'import',
        'record': ind[0],
        'field': ind[7],
        'event': '',
        'returnFormat': 'json'
        }
    with open((DATA_DIR + str(filename)), 'rb') as f:
        r=requests.post(cfg.config['R4copy_api_url'], data=data, files={'file':f})
        f.close()
        print('HTTP Status: ' + str(r.status_code))

HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200
HTTP Status: 200


In [24]:
### export non-consent PDF files from R4 to local folder
for ind in nonconsent_files_list:
    record_id = ind[0]
    field = ind[1]
    filename = ind[2]
    data = {
        'token': cfg.config['R4_api_token'],
        'content': 'file',
        'action': 'export',
        'record': record_id,
        'field': field,
        'event': '',
        'returnFormat': 'json'
        }
    r = requests.post(cfg.config['R4_api_url'],data=data,verify=False)
    print('HTTP Status: ' + str(r.status_code))
    with open(DATA_DIR + str(filename), 'wb') as f:
        f.write(r.content)
        f.close()

In [25]:
### import non-consent files into copy of R4
for ind in nonconsent_files_list:
    filename = ind[2]
    data = {
        'token': cfg.config['R4copy_api_token'],
        'content': 'file',
        'action': 'import',
        'record': ind[0],
        'field': ind[1],
        'event': '',
        'returnFormat': 'json'
        }
    with open((DATA_DIR + str(filename)), 'rb') as f:
        r=requests.post(cfg.config['R4copy_api_url'], data=data, files={'file':f})
        f.close()
        print('HTTP Status: ' + str(r.status_code))


In [26]:
### Update date file with latest run time
def write_file(filename,data):
    if os.path.isfile(filename):
        with open(filename, 'a') as f:          
            f.write('\n' + data)   
    else:
        with open(filename, 'w') as f:                   
            f.write(data)

In [27]:
def print_time():   
    now = datetime.now()
    current_time = now.strftime("%Y-%m-%d %H:%M")
    data = current_time
    return data

In [28]:
write_file('run_history.log' , print_time())