In [None]:
import pandas as pd
import patato as pat
from pathlib import Path
from tqdm import tqdm
import json

In [3]:
def get_region_repetition(name):
    if (
        "forearm" in name.lower() or "forarm" in name.lower()
    ) and "-" not in name.lower():
        region = "Forearm"
    elif "leg" in name.lower():
        region = "Leg"
    elif "bicep" in name.lower():
        region = "Bicep"
    elif "neck" in name.lower():
        region = "Neck"
    else:
        region = ""
    scan_number = name.lower()[-2]
    if scan_number in "123456789":
        scan_number = int(scan_number)
    else:
        scan_number = 1
    return region, scan_number

In [None]:
pulse_ox_data = Path("../Cleaned Pulse Ox")

settings = json.load(open("../data_paths.json", "r"))
pa_data = (Path.cwd() / "../" / settings["main_data_path"]).resolve()

dfs = []
for pulseox_file in tqdm(list(pulse_ox_data.glob("*.csv"))):
    if "SKIN13" in str(pulseox_file):
        continue
    df = pd.read_csv(pulseox_file, index_col=0)
    for i, row in df.iterrows():
        scan_id = f"Scan_{i}.hdf5"
        if not (pa_data / pulseox_file.stem / scan_id).exists():
            continue
        scan_name = pat.PAData.from_hdf5(
            pa_data / pulseox_file.stem / scan_id
        ).get_scan_name()
        region, number = get_region_repetition(scan_name)
        df.loc[i, "ScanName"] = scan_name
        df.loc[i, "Site"] = region
        df.loc[i, "RepNumber"] = number
    df["SkinID"] = pulseox_file.stem
    dfs.append(df)

100%|██████████| 40/40 [00:09<00:00,  4.32it/s]


In [5]:
df_full = pd.concat(dfs).reset_index()[
    ["SkinID", "ScanName", "Site", "RepNumber", "Pulse Ox"]
]

In [6]:
df_ita = pd.read_parquet("../01_Prepare Data/ITA_raw.parquet")
regions = {"Radial Artery": "Forearm", "Carotid Artery": "Neck"}
df_ita["Site"] = df_ita["Region"].apply(lambda x: regions.get(x, x))

df_ita = df_ita[["SkinID", "Site", "ITA"]].reset_index()

In [7]:
df_so2_all = df_full.merge(df_ita[["SkinID", "Site", "ITA"]], on=["SkinID", "Site"])

In [8]:
df_so2_all.to_parquet("so2_ita_pulseox_all.parquet")