In [3]:
import pandas as pd, json

# input/output files
json_path = "/Users/daisybuathatseephol/Documents/three_json_output/Training 2/training2.json"
msg_path = "/Users/daisybuathatseephol/Documents/three_json_output/Outputs/msgdata_training2.tsv"
out_path = "/Users/daisybuathatseephol/Documents/three_json_output/Training 2/training2_fixed_displaytiming.json"

# load message log
msg = pd.read_csv(msg_path, sep="\t", encoding="utf-16")
msg["TRIAL_INDEX"] = pd.to_numeric(msg["TRIAL_INDEX"], errors="coerce").astype("Int64")

# filter relevant rows
msg = msg.dropna(subset=["TRIAL_INDEX"])
msg = msg.sort_values(["RECORDING_SESSION_LABEL", "TRIAL_INDEX", "CURRENT_MSG_TIME"])

# get first and last 'second_display_on' per trial
firsts = (
    msg[msg["CURRENT_MSG_TEXT"] == "second_display_on"]
    .groupby(["RECORDING_SESSION_LABEL", "TRIAL_INDEX"])
    .first()[["CURRENT_MSG_TIME"]]
    .rename(columns={"CURRENT_MSG_TIME": "first_display_on_time"})
)

lasts = (
    msg[(msg["CURRENT_MSG_TEXT"] == "second_display_on") &
        (msg["CURRENT_MSG_FIX_INDEX"].apply(lambda x: str(x).isdigit()))]
    .groupby(["RECORDING_SESSION_LABEL", "TRIAL_INDEX"])
    .last()[["CURRENT_MSG_FIX_INDEX"]]
    .rename(columns={"CURRENT_MSG_FIX_INDEX": "last_fix_index"})
)

# merge both
timing = pd.concat([firsts, lasts], axis=1).reset_index()

# build lookup
lookup = (
    timing.set_index(["RECORDING_SESSION_LABEL", "TRIAL_INDEX"])
    .to_dict(orient="index")
)

# load JSON
with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# update each record
for rec in data:
    rec["paradigm"] = "category-matching"
    sid = rec.get("subject_id") or rec.get("RECORDING_SESSION_LABEL")
    tid = rec.get("trial_index")
    key = (sid, tid)

    if key in lookup:
        rec["test_image_on_ms"] = int(lookup[key]["first_display_on_time"])
        try:
            rec["test_image_fixation_idx"] = int(lookup[key]["last_fix_index"])
        except (ValueError, TypeError):
            rec["test_image_fixation_idx"] = None
    else:
        rec["test_image_on_ms"] = None
        rec["test_image_fixation_idx"] = None

# save
with open(out_path, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

print(f"✅ Saved corrected JSON: {out_path}")

  msg = pd.read_csv(msg_path, sep="\t", encoding="utf-16")


✅ Saved corrected JSON: /Users/daisybuathatseephol/Documents/three_json_output/Training 2/training2_fixed_displaytiming.json


In [4]:
import pandas as pd, json

# input/output files
json_path = "/Users/daisybuathatseephol/Documents/three_json_output/Testing/testing.json"
msg_path = "/Users/daisybuathatseephol/Documents/three_json_output/Outputs/msgdata_testing.tsv"
out_path = "/Users/daisybuathatseephol/Documents/three_json_output/Testing/testing_fixed_displaytiming.json"

# load message log
msg = pd.read_csv(msg_path, sep="\t", encoding="utf-16")
msg["TRIAL_INDEX"] = pd.to_numeric(msg["TRIAL_INDEX"], errors="coerce").astype("Int64")

# filter and sort
msg = msg.dropna(subset=["TRIAL_INDEX"])
msg = msg.sort_values(["RECORDING_SESSION_LABEL", "TRIAL_INDEX", "CURRENT_MSG_TIME"])

# first occurrence of 'second_display_on'
firsts = (
    msg[msg["CURRENT_MSG_TEXT"] == "second_display_on"]
    .groupby(["RECORDING_SESSION_LABEL", "TRIAL_INDEX"])
    .first()[["CURRENT_MSG_TIME"]]
    .rename(columns={"CURRENT_MSG_TIME": "first_display_on_time"})
)

# last *valid numeric* fixation index before trial changes
lasts = (
    msg[
        (msg["CURRENT_MSG_TEXT"] == "second_display_on") &
        (msg["CURRENT_MSG_FIX_INDEX"].apply(lambda x: str(x).isdigit()))
    ]
    .groupby(["RECORDING_SESSION_LABEL", "TRIAL_INDEX"])
    .last()[["CURRENT_MSG_FIX_INDEX"]]
    .rename(columns={"CURRENT_MSG_FIX_INDEX": "last_fix_index"})
)

# merge both
timing = pd.concat([firsts, lasts], axis=1).reset_index()

# build lookup
lookup = (
    timing.set_index(["RECORDING_SESSION_LABEL", "TRIAL_INDEX"])
    .to_dict(orient="index")
)

# load JSON
with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# update JSON
for rec in data:
    sid = rec.get("subject_id") or rec.get("RECORDING_SESSION_LABEL")
    tid = rec.get("trial_index")
    key = (sid, tid)

    if key in lookup:
        rec["test_image_on_ms"] = int(lookup[key]["first_display_on_time"])
        try:
            rec["test_image_fixation_idx"] = int(lookup[key]["last_fix_index"])
        except (ValueError, TypeError):
            rec["test_image_fixation_idx"] = None
    else:
        rec["test_image_on_ms"] = None
        rec["test_image_fixation_idx"] = None

# save
with open(out_path, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

print(f"✅ Saved corrected JSON: {out_path}")


✅ Saved corrected JSON: /Users/daisybuathatseephol/Documents/three_json_output/Testing/testing_fixed_displaytiming.json
