In [158]:
import pandas as pd
import subjectid_to_seqid as s2s
import os
import sys
import requests
from tkinter import filedialog

In [159]:
def concatenate_summaries(summaries_path: str) -> pd.DataFrame:
    df = pd.DataFrame()
    for file in os.listdir(summaries_path):
        if file.endswith('.csv'):
            summary = pd.read_csv(os.path.join(summaries_path, file))
            df = pd.concat([df, summary], ignore_index=True)
    return df
        

In [160]:
def prepare_choose_import_file(choose_df: pd.DataFrame) -> pd.DataFrame:
    rename_dict = {
        "subject": "subjectid",
        "experiment": "redcap_repeat_instrument",
        "date": "choose_doadmin",
        "train_accuracy": "choosetrainingaccavg",
        "train_avg_rt": "choosetrainingrtavg",
        "train_errors": "choosetrainingnerr",
        "probe_accuracy": "chooseprobeaccavg",
        "probe_errors": "chooseprobenerr",
    }
    choose_df = choose_df.rename(columns=rename_dict)
    choose_df["redcap_repeat_instrument"] = "choose_task"
    choose_df["seqid"] = choose_df["subjectid"].apply(s2s.get_seqid)
    choose_df["redcap_repeat_instance"] = choose_df["subjectid"].apply(s2s.get_instance_number)
    choose_df["choose_doadmin"] = choose_df["choose_doadmin"].str.strip("\"")
    choose_df["choose_doadmin"] = pd.to_datetime(choose_df["choose_doadmin"])
    choose_df["choose_doadmin"] = choose_df["choose_doadmin"].dt.strftime("%m/%d/%Y")
    choose_df["choose_task_complete"] = "1" # For unverified data
    choose_df["choosetrainingnerr"] = choose_df["choosetrainingnerr"].astype(int)
    choose_df["chooseprobenerr"] = choose_df["chooseprobenerr"].astype(int)

    for index, row in choose_df.iterrows():
        if pd.isnull(row["seqid"]):
            print(f"Couldn't generate SeqID due to invalid SubjectID: {row['subjectid']}")

    choose_df = choose_df.dropna(subset=["seqid", "redcap_repeat_instance"])

    redcap_cols = ["seqid","redcap_repeat_instance", "redcap_repeat_instrument", "choose_doadmin", 
                   "choosetrainingaccavg","choosetrainingrtavg", "choosetrainingnerr", 
                   "chooseprobeaccavg", "chooseprobenerr", "choose_task_complete"]
    
    choose_df = choose_df.astype(str)
    
    return choose_df[redcap_cols]


In [161]:
def prepare_fish_import_file(fish_df: pd.DataFrame) -> pd.DataFrame:
    rename_dict = {
        "subject": "subjectid",
        "experiment": "redcap_repeat_instrument",
        "date": "fish_doadmin",
        "acquisition": "facquisition",
        "acquisition_trials": "facquisition_trials",
        "retention": "fretention",
        "generalization": "fgen",
        }
    fish_df = fish_df.rename(columns=rename_dict)
    fish_df["redcap_repeat_instrument"] = "fish_task"
    fish_df["seqid"] = fish_df["subjectid"].apply(s2s.get_seqid)
    fish_df["redcap_repeat_instance"] = fish_df["subjectid"].apply(s2s.get_instance_number)
    fish_df["fish_doadmin"] = fish_df["fish_doadmin"].str.strip("\"")
    fish_df["fish_doadmin"] = pd.to_datetime(fish_df["fish_doadmin"])
    fish_df["fish_doadmin"] = fish_df["fish_doadmin"].dt.strftime("%m/%d/%Y")
    fish_df["fish_task_complete"] = "1" # For unverified data
    fish_df["facquisition_trials"] = fish_df["facquisition_trials"].astype(str)

    for index, row in fish_df.iterrows():
        if pd.isnull(row["seqid"]):
            print(f"Couldn't generate SeqID due to invalid SubjectID: {row['subjectid']}")

    fish_df = fish_df.dropna(subset=["seqid", "redcap_repeat_instance"])

    redcap_cols = ["seqid","redcap_repeat_instance", "redcap_repeat_instrument", "fish_doadmin", 
                   "facquisition","facquisition_trials", "fretention", "fgen", "fish_task_complete"]
    
    fish_df = fish_df.astype(str)
    
    return fish_df[redcap_cols]

In [162]:
def fetch_summaries_paths(task_path) -> list[str]:
    summaries_paths = []
    for root, dirs, files in os.walk(task_path):
        for dir in dirs:
            if dir == "summaries":
                summaries_paths.append(os.path.join(root, dir))
    return summaries_paths

In [163]:
def prepare_redcap_import() -> pd.DataFrame:
    tasks_paths = [os.path.join("..", "choose34"), os.path.join("..", "choose_fmri"), os.path.join("..", "fish15")]
    data_frames = []
    for path in tasks_paths:
        summaries_paths = fetch_summaries_paths(path)
        for summaries_path in summaries_paths:
            if "choose" in path:
                choose_concat = concatenate_summaries(summaries_path)
                choose_df = prepare_choose_import_file(choose_concat)
                data_frames.append(choose_df)
            elif "fish" in path:
                fish_concat = concatenate_summaries(summaries_path)
                fish_df = prepare_fish_import_file(fish_concat)
                data_frames.append(fish_df)
    redcap_import = pd.concat(data_frames, ignore_index=True)
    # make all columns strings
    redcap_import = redcap_import.astype(str)
    return redcap_import

In [164]:
def redcap_export_records(api_token: str, records: list[str], fields: list[str]) -> requests.Response:
    fields.remove("redcap_repeat_instance")
    fields.remove("redcap_repeat_instrument")
    url = "https://redcap.rutgers.edu/api/"
    data = {
        "token": api_token,
        "content": "record",
        "format": "json",
        "type": "flat",
        "csvDelimiter": "",
        "records": ",".join(records),
        "fields": ",".join(fields),
        "rawOrLabel": "raw",
        "rawOrLabelHeaders": "raw",
        "exportCheckboxLabel": "false",
        "exportSurveyFields": "false",
        "exportDataAccessGroups": "false",
        "returnFormat": "json"
    }
    print("Requesting data from REDCap...") 
    response = requests.post(url, data=data)
    return response

def redcap_import_records(api_token: str, redcap_import: pd.DataFrame) -> requests.Response:
    url = "https://redcap.rutgers.edu/api/"
    data = {
        "token": api_token,
        "content": "record",
        "format": "json",
        "type": "flat",
        "overwriteBehavior": "normal",
        "data": redcap_import.to_json(orient="records"),
        "dateFormat": "MDY",
        "returnContent": "count",
        "returnFormat": "json"
    }
    response = requests.post(url, data=data)
    return response

In [165]:
def compare_and_fill(redcap_import: pd.DataFrame, redcap_data: pd.DataFrame):
    new_redcap_import = redcap_import.copy()
    for i, row in new_redcap_import.iterrows():
        seqid = row["seqid"]
        instance = row["redcap_repeat_instance"]
        instrument = row["redcap_repeat_instrument"]
        redcap_data_row = redcap_data[(redcap_data["seqid"] == seqid) & (redcap_data["redcap_repeat_instance"] == instance) & (redcap_data["redcap_repeat_instrument"] == instrument)]
        if not redcap_data_row.empty:
            for field in redcap_data_row.columns[3:]:
                if field in new_redcap_import.columns and pd.notnull(redcap_data_row[field].values[0]):
                    new_redcap_import.at[i, field] = str(redcap_data_row[field].values[0])
    return new_redcap_import
            

In [166]:
redcap_import = prepare_redcap_import()
redcap_import["seqid"] = "A0test" # For testing purposes
api_token = "90DE4B3E238EBBD50D26D9E659C4431D"

print("Choose where to save the REDCap import file...")  
file_path = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV files", "*.csv")])
print
if file_path:
    redcap_import.to_csv(file_path, index=False)
    print(f"REDCap import file saved to {file_path}")
else:
    print("No location selected. Exiting...")
    sys.exit()
redcap_export_response = redcap_export_records(api_token, redcap_import["seqid"].unique().tolist(), redcap_import.columns.tolist())
if(redcap_export_response.status_code != 200):
    print("Error fetching data from REDCap. Exiting...")
    print(redcap_export_response.text)
    sys.exit()
redcap_data = pd.DataFrame(redcap_export_response.json())


artificial_rows = [
    {"seqid": "A0test", "redcap_repeat_instance": "1", "redcap_repeat_instrument": "choose_task", "choose_doadmin": "01/01/2021", "choosetrainingaccavg": "0.5", "choosetrainingrtavg": "1000", "choosetrainingnerr": "5", "chooseprobeaccavg": "0.6", "chooseprobenerr": "4", "choose_task_complete": "1"},
    {"seqid": "A0test", "redcap_repeat_instance": "1", "redcap_repeat_instrument": "fish_task", "fish_doadmin": "01/01/2021", "facquisition": "0.5", "facquisition_trials": "10", "fretention": "0.6", "fgen": "0.7", "fish_task_complete": "1"}
]
redcap_data = pd.concat([redcap_data, pd.DataFrame(artificial_rows)], ignore_index=True)
redcap_data = redcap_data.replace({-9: None, -9.0: None, "-9": None, "-9.0": None})
redcap_data

Couldn't generate SeqID due to invalid SubjectID: TEST_3D_EPI_FMRI
Couldn't generate SeqID due to invalid SubjectID: A0test
Couldn't generate SeqID due to invalid SubjectID: TEST
Couldn't generate SeqID due to invalid SubjectID: TIME_OUT_TEST
Choose where to save the REDCap import file...
REDCap import file saved to /home/jlmojicaperez/gluck/GluckLab/utils/redcap_import.csv
Requesting data from REDCap...


Unnamed: 0,seqid,redcap_repeat_instrument,redcap_repeat_instance,choose_doadmin,choosetrainingaccavg,choosetrainingrtavg,choosetrainingnerr,chooseprobeaccavg,chooseprobenerr,choose_task_complete,fish_doadmin,facquisition,facquisition_trials,fretention,fgen,fish_task_complete
0,A0test,choose_task,1,2021-01-01,0.5,1000.0,5.0,0.6,4.0,1.0,,,,,,
1,A0test,fish_task,1,,,,,,,,2021-01-01,0.5,10.0,0.6,0.7,1.0
2,A0test,seqid_subjectid_covariates,1,,,,,,,,,,,,,
3,A0test,choose_task,1,01/01/2021,0.5,1000.0,5.0,0.6,4.0,1.0,,,,,,
4,A0test,fish_task,1,,,,,,,,01/01/2021,0.5,10.0,0.6,0.7,1.0


In [167]:
redcap_import

Unnamed: 0,seqid,redcap_repeat_instance,redcap_repeat_instrument,choose_doadmin,choosetrainingaccavg,choosetrainingrtavg,choosetrainingnerr,chooseprobeaccavg,chooseprobenerr,choose_task_complete,fish_doadmin,facquisition,facquisition_trials,fretention,fgen,fish_task_complete
0,A0test,1,choose_task,08/05/2024,0.8518518518518519,1.3100571333348785,4.0,1.0,0.0,1.0,,,,,,
1,A0test,1,fish_task,,,,,,,,08/05/2024,0.9090909090909092,33.0,1.0,1.0,1.0


In [168]:
redcap_import = compare_and_fill(redcap_import, redcap_data)
# convert nan to empty string
redcap_import = redcap_import.replace({"nan": ""})
redcap_import

Unnamed: 0,seqid,redcap_repeat_instance,redcap_repeat_instrument,choose_doadmin,choosetrainingaccavg,choosetrainingrtavg,choosetrainingnerr,chooseprobeaccavg,chooseprobenerr,choose_task_complete,fish_doadmin,facquisition,facquisition_trials,fretention,fgen,fish_task_complete
0,A0test,1,choose_task,01/01/2021,0.5,1000.0,5.0,0.6,4.0,1.0,,,,,,
1,A0test,1,fish_task,,,,,,,,01/01/2021,0.5,10.0,0.6,0.7,1.0


In [169]:
redcap_import_response = redcap_import_records(api_token, redcap_import)
if(redcap_import_response.status_code != 200):
    print("Error importing data to REDCap. Exiting...")
    print(redcap_import_response.text)
    sys.exit()
print("Data imported successfully to REDCap. Records affected: ", redcap_import_response.json()["count"])

Data imported successfully to REDCap. Records affected:  1
