In [1]:
from dotenv import load_dotenv
from notebook_check import in_notebook
import os
from redcap import Project
import pandas as pd

# Constants
JUPYTER = in_notebook()
DEBUG = False
DATA = 'data'
os.makedirs(DATA, exist_ok=True)

# Load environment variables
load_dotenv('dot.env')
token = eval(os.getenv('API_TOKEN'))
api_url = os.getenv('API_URL')

In [None]:
# data['key'] = DataFrame
data = {}

# Check is csv's exist, if not then download from REDCap
for key in token:
    if os.path.exists(f"{DATA}/{key}.tsv"):
        data[key] = pd.read_csv(f"{DATA}/{key}.tsv")
        print(f"Loaded: {key}.tsv")
    else:
        try:
            api_key = token[key]
            project = Project(api_url, api_key)
            data[key] = pd.DataFrame(project.export_records())
            data[key].to_csv(f"{DATA}/{key}.tsv", index=False, sep='\t')
            print(f"Saved : {key}.tsv")
        except Exception as e:
            print(f"Error : {e}")

In [3]:
if DEBUG:
    for key in data:
        display(data[key].head()) if JUPYTER else print(data[key])


In [None]:
# Check if dataframes have the same columns

def check_columns(data):
    columns = None
    for key in data:
        if columns is None:
            columns = set(data[key].columns)
        else:
            if columns != set(data[key].columns):
                return False
    return True

if check_columns(data):
    print("All dataframes have the same columns")
else:
    # Find the columns that are different
    columns = None
    for key in data:
        if columns is None:
            columns = set(data[key].columns)
        else:
            columns = columns.intersection(set(data[key].columns))
    print("Columns that are different:")
    for key in data:
        print(f"{key}: {set(data[key].columns) - columns}")

# Columns that are different:
# chinese_traditional: {'mac_sdoh_questionnaire_traditional_chinese_complete', 'msoc_bas_46'}
# chinese_simplified: {'mac_sdoh_questionnaire_chinese_complete', 'msoc_bas_46'}
# english: {'mac_sdoh_questionnaire_english_complete', 'msoc_bas_46'}
# spanish: {'msoc_bas_45', 'mac_sdoh_questionnaire_spanish_complete'}

In [5]:
data['english'].head().to_csv('reference/english_head.csv', index=False)