# Sync REDCap To CommCare 

A playground for testing `sync_redcap_to_commcare.py` and its utilities in `redcap_sync.py`

In [None]:
import os

import redcap

from cc_utilities.command_line.sync_redcap_to_commcare import get_redcap_state
from cc_utilities.redcap_sync import (
    collapse_checkbox_columns,
    normalize_phone_cols,
    set_external_id_column,
    upload_complete_records,
    upload_incomplete_records,
    split_complete_and_incomplete_records,
    add_integration_status_columns,
    import_records_to_redcap,
)

In [None]:
redcap_api_url = os.getenv("REDCAP_API_URL")
redcap_api_key = os.getenv("REDCAP_API_KEY")
commcare_api_key = os.getenv("COMMCARE_API_KEY")
commcare_user_name = os.getenv("COMMCARE_USERNAME")
commcare_project_name = os.getenv("COMMCARE_PROJECT")
database_url = os.getenv("DB_URL")

state_file = "redcap_test.yaml"
sync_all = True
phone_cols = []
external_id_col = "cdms_id"

In [None]:
# Get REDCap records

state = get_redcap_state(state_file)
redcap_project = redcap.Project(redcap_api_url, redcap_api_key)
redcap_records = redcap_project.export_records(
    # Tell PyCap to return a pandas DataFrame.
    format="df",
    df_kwargs={
        # Without index_col=False, read_csv() will use the first column
        # ("record_id") as the index, which is problematic because it's
        # not unique and is easier to handle as a separate column anyways.
        "index_col": False,
        # We import everything as a string, to avoid pandas coercing ints
        # to floats and adding unnecessary decimal points in the data when
        # uploaded to CommCare.
        "dtype": str,
    },
#     filter_logic="[integration_status] = ''",
)

redcap_records

## Match Records in CDMS

For testing what's inside `match_records_in_cdms()`

In [None]:
# Imports
from sqlalchemy import MetaData, Table, and_, create_engine, select, or_
from cc_utilities.constants import DOB_FIELD
from pprint import pprint

In [None]:
# Test data
external_id = redcap_records["cdms_id"][0]
dob = redcap_records["dob"][0]

external_id_col = "cdms_id"
db_url = database_url
table_name = "patient"

print(f"CDMS_ID: {external_id},\nDOB: {dob}")

In [None]:
# Drop rows missing DOB or External ID
df = redcap_records.dropna(subset=[external_id_col, DOB_FIELD])
df

In [None]:
# Load table
engine = create_engine(db_url)
meta = MetaData(bind=engine)
table = Table(table_name, meta, autoload=True, autoload_with=engine)


In [None]:
# Validate columns
column_names = [col.name for col in table.columns]
assert DOB_FIELD in column_names, \
    f"{DOB_FIELD} not in {table_name} table"
assert external_id_col in column_names, \
    f"{external_id_col} not in {table_name} table"


In [None]:
# Define the query
wheres = []
for record in df.itertuples():
    dob = record.dob
    external_id = getattr(record, external_id_col)
    print(f"Processing dob {dob} and id {external_id}")
    wheres.append([
        getattr(table.c, external_id_col) == external_id,
        getattr(table.c, DOB_FIELD) == dob
    ])

query = select(
    [getattr(table.c, external_id_col), 
     getattr(table.c, DOB_FIELD)]
)
# .where(
#     or_(*[and_(*where) for where in wheres])
# )


In [None]:
# Execute
conn = engine.connect()
try:
    result = conn.execute(query)
    matching_records = [dict(row) for row in result.fetchall()]
finally:
    conn.close()

pprint("Got matches for: ")
pprint(matching_records)

In [None]:
# Split Records to matched/unmatched. 
matched_external_ids = [m[external_id_col] for m in matching_records]
unmatched_records = df.where(~df[external_id_col].isin(matched_external_ids)).dropna(subset=[external_id_col])[["record_id"]]
matched_records = df.where(df[external_id_col].isin(matched_external_ids)).dropna(subset=[external_id_col])

matched_records

In [None]:
unmatched_records

In [None]:
reject_records = add_reject_status_columns(unmatched_records, external_id_col).dropna(axis=1)
reject_records

In [None]:
# REDCap Import

redcap_project = redcap.Project(redcap_api_url, redcap_api_key)
response = redcap_project.import_records(
    to_import=reject_records,
    overwrite="normal",
    return_content="ids",
)
response


## Data transformations

In [None]:
cases_df = normalize_phone_cols(redcap_records, phone_cols)
cases_df.head()

In [None]:
cases_df = set_external_id_column(cases_df, external_id_col)
cases_df

In [None]:
# From split_complete_and_incomplete_records
# Drop columns where all values are missing.
cases_df = cases_df.dropna(axis=1, how="all")
cases_df

In [None]:
complete_records, incomplete_records = split_complete_and_incomplete_records(cases_df)
complete_records

In [None]:
import pandas as pd
pd.concat([complete_records, incomplete_records])

In [None]:
# upload_complete_records(
#     cases_df, commcare_api_key, commcare_project_name, commcare_user_name
# )

In [None]:
# upload_incomplete_records(
#     cases_df, commcare_api_key, commcare_project_name, commcare_user_name
# )

for index, row in incomplete_records.iterrows():
    # Drops any values in this Series with missing/NA values,
    # and converts it back to a DataFrame.
    data = row.dropna().to_frame().transpose()

data