In [29]:
import re
from pathlib import Path
import pandas as pd
import h5py
from pathlib import Path

In [30]:
import json
settings = json.load(open("../data_paths.json", "r"))
data_path = (Path.cwd() / "../" / settings["main_data_path"]).resolve()

In [31]:
# example data does not have spectral unmixing applied, so do that here.

%run ../Scripts/unmix.py "{data_path}"

D:\Papers\PAISKINTONE\Data


In [32]:
vitiligo_data_path = (Path.cwd() / "../" / settings["vitiligo_data_path"]).resolve()

%run ../Scripts/unmix.py "{vitiligo_data_path}"

D:\Papers\PAISKINTONE\Vitiligo\Data


In [33]:
allowable_regions = ["bicep", "forearm", "leg", "neck"]
allowable_regions += ["brachialartery", "upperforearm", "ulnar"]

pattern_regions = "|".join([f"(?:{x})" for x in allowable_regions])

position_descriptors = [
    "parallel",
    "pigment",
    "nopigment",
    "paralleltwopigment",
    "parallelalt",
]
pattern_position = "|".join([f"(?:{x})" for x in position_descriptors])

allowable_scan_name_pattern = (
    r"^(?:((?:left)|(?:right))-)?("
    + pattern_regions
    + ")(?:-("
    + pattern_position
    + "))?(?:([0-9])|-)([a-z])$"
)

In [34]:
c = re.compile(allowable_scan_name_pattern)

In [42]:
p = data_path = (Path.cwd() / "../" / settings["main_data_path"] / "../").resolve() 

scans = []

name_map = {
    "leg": "Leg",
    "forearm": "Radial Artery",
    "upperforearm": "Brachial Artery",
    "bicep": "Bicep",
    "neck": "Carotid Artery",
    "ulnar": "Ulnar Artery",
    "brachialartery": "Brachial Artery",
}

for f in p.glob("**/*.hdf5"):
    # Extract all scan names
    with h5py.File(f, "r") as hdf:
        re_match = c.findall(hdf.attrs["name"])
        if len(re_match) == 0 or len(re_match) > 1:
            continue
        else:
            # Extract details.
            leftright, location, parallel_pigment, run_number, run_letter = re_match[0]

            details = {}
            details["File"] = str(f)
            details["ScanNumber"] = int(f.stem.split("_")[-1])
            details["SkinID"] = f.parent.stem
            details["Location"] = location
            details["RunNumber"] = run_number
            details["RunLetter"] = run_letter
            details["VitiligoPigment"] = (
                parallel_pigment if parallel_pigment in ["pigment", "nopigment"] else ""
            )
            details["Parallel"] = "parallel" if parallel_pigment == "parallel" else ""
            details["OtherDetail"] = (
                parallel_pigment
                if parallel_pigment not in ["pigment", "nopigment", "parallel"]
                else ""
            )
            details["VitiligoLeftRight"] = leftright

            # Small corrections:
            if f.parent.stem == "SKIN28" and location == "upperforearm":
                details["VitiligoPigment"] = "pigment"
                details["Location"] = "forearm"
            elif f.parent.stem == "SKIN28" and location == "forearm":
                details["VitiligoPigment"] = "nopigment"
            elif f.parent.stem == "SKIN33":
                details["VitiligoLeftRight"] = "left"
            elif f.parent.stem == "SKIN34" and location == "upperforearm":
                details["OtherDetail"] = "ignore"
            elif (
                f.parent.stem == "SKIN30"
                and location == "bicep"
                and leftright == "right"
            ):
                details["VitiligoPigment"] = "pigment"

            scans.append(details)

In [43]:
df = pd.DataFrame(scans).sort_values(["SkinID", "ScanNumber"], ignore_index=True)

In [44]:
def region_map(x):
    maps = {
        "bicep": "Bicep",
        "neck": "Carotid Artery",
        "leg": "Leg",
        "forearm": "Radial Artery",
        "brachialartery": "Brachial Artery",
        "ulnar": "Ulnar Artery",
        "upperforearm": "Upper Forearm",
    }
    return maps[x]


pigment_maps = {"pigment": "Pigmented", "nopigment": "Not Pigmented", "": ""}

df["Region"] = df["Location"].apply(region_map)
df["Pigment"] = df["VitiligoPigment"].apply(lambda x: pigment_maps[x])
df["Side"] = df["VitiligoLeftRight"].str.capitalize()

In [45]:
df

Unnamed: 0,File,ScanNumber,SkinID,Location,RunNumber,RunLetter,VitiligoPigment,Parallel,OtherDetail,VitiligoLeftRight,Region,Pigment,Side
0,D:\Papers\PAISKINTONE\Data\SKIN01\Scan_4.hdf5,4,SKIN01,leg,1,a,,,,,Leg,,
1,D:\Papers\PAISKINTONE\Data\SKIN01\Scan_5.hdf5,5,SKIN01,leg,1,b,,,,,Leg,,
2,D:\Papers\PAISKINTONE\Data\SKIN01\Scan_6.hdf5,6,SKIN01,leg,1,c,,,,,Leg,,
3,D:\Papers\PAISKINTONE\Data\SKIN01\Scan_7.hdf5,7,SKIN01,forearm,1,a,,,,,Radial Artery,,
4,D:\Papers\PAISKINTONE\Data\SKIN01\Scan_8.hdf5,8,SKIN01,forearm,1,b,,,,,Radial Artery,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1505,D:\Papers\PAISKINTONE\Data\SKIN43\Scan_36.hdf5,36,SKIN43,bicep,3,b,,,,,Bicep,,
1506,D:\Papers\PAISKINTONE\Data\SKIN43\Scan_37.hdf5,37,SKIN43,bicep,3,c,,,,,Bicep,,
1507,D:\Papers\PAISKINTONE\Data\SKIN43\Scan_38.hdf5,38,SKIN43,neck,3,a,,,,,Carotid Artery,,
1508,D:\Papers\PAISKINTONE\Data\SKIN43\Scan_39.hdf5,39,SKIN43,neck,3,b,,,,,Carotid Artery,,


In [46]:
df_ita = pd.read_parquet("ITA_raw.parquet")
df_full = pd.merge(df_ita, df, validate="one_to_many")

In [47]:
df_full.to_parquet("scan_table.parquet")