# Sync REDCap To CommCare 

A playground for testing `sync_redcap_to_commcare.py` and its utilities in `redcap_sync.py`

In [None]:
import os

import redcap

from cc_utilities.constants import REDCAP_INTEGRATION_STATUS, DOB_FIELD
from cc_utilities.command_line.sync_redcap_to_commcare import get_redcap_state
from cc_utilities.redcap_sync import (
    collapse_checkbox_columns,
    normalize_phone_cols,
    set_external_id_column,
    upload_complete_records,
    upload_incomplete_records,
    split_complete_and_incomplete_records,
    add_integration_status_columns,
    import_records_to_redcap,
)

In [None]:
redcap_api_url = os.getenv("REDCAP_API_URL")
redcap_api_key = os.getenv("REDCAP_API_KEY")
commcare_api_key = os.getenv("COMMCARE_API_KEY")
commcare_user_name = os.getenv("COMMCARE_USERNAME")
commcare_project_name = os.getenv("COMMCARE_PROJECT")
database_url = os.getenv("DB_URL")

state_file = "redcap_test.yaml"
sync_all = True
phone_cols = []
external_id_col = "cdms_id"

In [None]:
# Get REDCap records

state = get_redcap_state(state_file)
redcap_project = redcap.Project(redcap_api_url, redcap_api_key)
redcap_records = redcap_project.export_records(
    # date_begin corresponds to the dateRangeBegin field in the REDCap
    # API, which "return[s] only records that have been created or modified
    # *after* a given date/time." Note that REDCap expects this to be in
    # server time, so the script and server should be run in the same time
    # zone (or this script modified to accept a timezone argument).
    date_begin=state["date_begin"] if not sync_all else None,
    # Tell PyCap to return a pandas DataFrame.
    format="df",
    df_kwargs={
        # Without index_col=False, read_csv() will use the first column
        # ("record_id") as the index, which is problematic because it's
        # not unique and is easier to handle as a separate column anyways.
        "index_col": False,
        # We import everything as a string, to avoid pandas coercing ints
        # to floats and adding unnecessary decimal points in the data when
        # uploaded to CommCare.
        "dtype": str,
    },
    # Only retrieve records which have not already synced (either rejected or success),
    # have a cdms_id, and with complete surveys.
    filter_logic=" AND ".join([
#         f"[{REDCAP_INTEGRATION_STATUS}] = ''",
        f"[ci_survey_complete] = 2",
        f"[{external_id_col}] != ''",
    ])
)

redcap_records

## Match Records in CDMS

For testing what's inside `handle_cdms_matching()`

In [None]:
from sqlalchemy import create_engine
import pandas as pd

from cc_utilities.constants import DOB_FIELD

In [None]:
# handle_cdms_matching

# Drop rows missing DOB or External ID, then get a list of external IDs.
df = redcap_records.dropna(subset=[external_id_col, DOB_FIELD])
external_ids = df[external_id_col].tolist()
external_ids

In [None]:
# get_external_ids_and_dobs
db_url = database_url
table_name = "patient"

result = pd.read_sql(
    f"""SELECT
            {external_id_col},
            {DOB_FIELD}
        FROM {table_name}
        WHERE
            {external_id_col} IN %(external_ids)s
            AND {DOB_FIELD} IS NOT NULL
            AND {DOB_FIELD} <> ''
    """,
    create_engine(db_url),
    params={"external_ids": tuple(external_ids)}
).to_dict(orient="records")

result

In [None]:
# get_records_matching_id_and_dob
cdms_patients_data = result

lookup_df = df.set_index(external_id_col)

matching_ids_dobs = {d[external_id_col]: d[DOB_FIELD] for d in cdms_patients_data}

accepted_external_ids = []
for external_id in external_ids:
    dob = lookup_df.loc[external_id][DOB_FIELD]
    if matching_ids_dobs.get(external_id) == dob:
        accepted_external_ids.append(external_id)
        
print(f"Accepted: {accepted_external_ids}")

In [None]:
from cc_utilities.redcap_sync import select_records_by_cdms_matches

matched_records, unmatched_records = select_records_by_cdms_matches(
    df, redcap_records, accepted_external_ids, external_id_col
)

In [None]:
from cc_utilities.constants import REDCAP_RECORD_ID, REDCAP_REJECTED_PERSON
unmatched_records = unmatched_records[[REDCAP_RECORD_ID]]
unmatched_records = add_integration_status_columns(
    unmatched_records,
    status=REDCAP_REJECTED_PERSON,
    reason=f"mismatched {DOB_FIELD} and {external_id_col}",
)
unmatched_records

In [None]:
# REDCap Import

redcap_project = redcap.Project(redcap_api_url, redcap_api_key)
response = redcap_project.import_records(
    to_import=reject_records,
    overwrite="normal",
    return_content="ids",
)
response


## Data transformations

In [None]:
cases_df = normalize_phone_cols(redcap_records, phone_cols)
cases_df.head()

In [None]:
cases_df = set_external_id_column(cases_df, external_id_col)
cases_df

In [None]:
# From split_complete_and_incomplete_records
# Drop columns where all values are missing.
cases_df = cases_df.dropna(axis=1, how="all")
cases_df

In [None]:
complete_records, incomplete_records = split_complete_and_incomplete_records(cases_df)
complete_records

In [None]:
import pandas as pd
pd.concat([complete_records, incomplete_records])

In [None]:
# upload_complete_records(
#     cases_df, commcare_api_key, commcare_project_name, commcare_user_name
# )

In [None]:
# upload_incomplete_records(
#     cases_df, commcare_api_key, commcare_project_name, commcare_user_name
# )

for index, row in incomplete_records.iterrows():
    # Drops any values in this Series with missing/NA values,
    # and converts it back to a DataFrame.
    data = row.dropna().to_frame().transpose()

data