In [1]:
import re
from pathlib import Path
import pandas as pd
import h5py
import numpy as np

In [2]:
# example data does not have spectral unmixing applied, so do that here.
%run ../Scripts/unmix.py ../Data

../Data
Processing ../Data\SKIN01\Scan_1.hdf5
Processing ../Data\SKIN02\Scan_1.hdf5
Processing ../Data\SKIN03\Scan_1.hdf5
Processing ../Data\SKIN04\Scan_1.hdf5
Processing ../Data\SKIN05\Scan_1.hdf5
Processing ../Data\SKIN06\Scan_1.hdf5
Processing ../Data\SKIN07\Scan_1.hdf5
Processing ../Data\SKIN08\Scan_1.hdf5
Processing ../Data\SKIN09\Scan_1.hdf5
Processing ../Data\SKIN10\Scan_1.hdf5
Processing ../Data\SKIN11\Scan_1.hdf5
Processing ../Data\SKIN12\Scan_1.hdf5
Processing ../Data\SKIN13\Scan_1.hdf5
Processing ../Data\SKIN14\Scan_1.hdf5
Processing ../Data\SKIN15\Scan_1.hdf5
Processing ../Data\SKIN16\Scan_1.hdf5
Processing ../Data\SKIN17\Scan_1.hdf5
Processing ../Data\SKIN18\Scan_1.hdf5
Processing ../Data\SKIN19\Scan_1.hdf5
Processing ../Data\SKIN20\Scan_1.hdf5
Processing ../Data\SKIN21\Scan_1.hdf5
Processing ../Data\SKIN22\Scan_1.hdf5
Processing ../Data\SKIN23\Scan_1.hdf5
Processing ../Data\SKIN24\Scan_1.hdf5
Processing ../Data\SKIN25\Scan_1.hdf5
Processing ../Data\SKIN26\Scan_1.hdf5
Proc

In [3]:
%run ../Scripts/unmix.py ../Vitiligo/Data

../Vitiligo/Data
Processing ../Vitiligo/Data\SKIN28\Scan_1.hdf5
Processing ../Vitiligo/Data\SKIN30\Scan_1.hdf5
Processing ../Vitiligo/Data\SKIN33\Scan_1.hdf5
Processing ../Vitiligo/Data\SKIN34\Scan_1.hdf5
Processing ../Vitiligo/Data\SKIN36\Scan_1.hdf5
Processing ../Vitiligo/Data\SKIN37\Scan_1.hdf5
Processing ../Vitiligo/Data\SKIN28\Scan_2.hdf5
Processing ../Vitiligo/Data\SKIN30\Scan_2.hdf5
Processing ../Vitiligo/Data\SKIN34\Scan_2.hdf5
Processing ../Vitiligo/Data\SKIN36\Scan_2.hdf5
Processing ../Vitiligo/Data\SKIN37\Scan_2.hdf5
Processing ../Vitiligo/Data\SKIN33\Scan_3.hdf5
Processing ../Vitiligo/Data\SKIN28\Scan_5.hdf5
Processing ../Vitiligo/Data\SKIN30\Scan_5.hdf5
Processing ../Vitiligo/Data\SKIN34\Scan_5.hdf5
Processing ../Vitiligo/Data\SKIN36\Scan_5.hdf5
Processing ../Vitiligo/Data\SKIN37\Scan_5.hdf5
Processing ../Vitiligo/Data\SKIN33\Scan_6.hdf5
Processing ../Vitiligo/Data\SKIN30\Scan_8.hdf5
Processing ../Vitiligo/Data\SKIN36\Scan_8.hdf5
Processing ../Vitiligo/Data\SKIN37\Scan_8.h

In [4]:
allowable_regions = ["bicep", "forearm", "leg", "neck"]
allowable_regions += ["brachialartery", "upperforearm", "ulnar"]

pattern_regions = "|".join([f"(?:{x})" for x in allowable_regions])

position_descriptors = [
    "parallel",
    "pigment",
    "nopigment",
    "paralleltwopigment",
    "parallelalt",
]
pattern_position = "|".join([f"(?:{x})" for x in position_descriptors])

allowable_scan_name_pattern = (
    r"^(?:((?:left)|(?:right))-)?("
    + pattern_regions
    + ")(?:-("
    + pattern_position
    + "))?(?:([0-9])|-)([a-z])$"
)

In [5]:
c = re.compile(allowable_scan_name_pattern)

In [6]:
p = Path("../")

scans = []

name_map = {
    "leg": "Leg",
    "forearm": "Radial Artery",
    "upperforearm": "Brachial Artery",
    "bicep": "Bicep",
    "neck": "Carotid Artery",
    "ulnar": "Ulnar Artery",
    "brachialartery": "Brachial Artery",
}

for f in p.glob("**/*.hdf5"):
    # Extract all scan names
    with h5py.File(f, "r") as hdf:
        re_match = c.findall(hdf.attrs["name"])
        if len(re_match) == 0 or len(re_match) > 1:
            continue
        else:
            # Extract details.
            leftright, location, parallel_pigment, run_number, run_letter = re_match[0]

            details = {}
            details["File"] = str(f)
            details["ScanNumber"] = int(f.stem.split("_")[-1])
            details["SkinID"] = f.parent.stem
            details["Location"] = location
            details["RunNumber"] = run_number
            details["RunLetter"] = run_letter
            details["VitiligoPigment"] = (
                parallel_pigment if parallel_pigment in ["pigment", "nopigment"] else ""
            )
            details["Parallel"] = "parallel" if parallel_pigment == "parallel" else ""
            details["OtherDetail"] = (
                parallel_pigment
                if parallel_pigment not in ["pigment", "nopigment", "parallel"]
                else ""
            )
            details["VitiligoLeftRight"] = leftright

            # Small corrections:
            if f.parent.stem == "SKIN28" and location == "upperforearm":
                details["VitiligoPigment"] = "pigment"
                details["Location"] = "forearm"
            elif f.parent.stem == "SKIN28" and location == "forearm":
                details["VitiligoPigment"] = "nopigment"
            elif f.parent.stem == "SKIN33":
                details["VitiligoLeftRight"] = "left"
            elif f.parent.stem == "SKIN34" and location == "upperforearm":
                details["OtherDetail"] = "ignore"
            elif (
                f.parent.stem == "SKIN30"
                and location == "bicep"
                and leftright == "right"
            ):
                details["VitiligoPigment"] = "pigment"

            scans.append(details)

In [7]:
df = pd.DataFrame(scans).sort_values(["SkinID", "ScanNumber"], ignore_index=True)

In [8]:
def region_map(x):
    maps = {
        "bicep": "Bicep",
        "neck": "Carotid Artery",
        "leg": "Leg",
        "forearm": "Radial Artery",
        "brachialartery": "Brachial Artery",
        "ulnar": "Ulnar Artery",
        "upperforearm": "Upper Forearm",
    }
    return maps[x]


pigment_maps = {"pigment": "Pigmented", "nopigment": "Not Pigmented", "": ""}

df["Region"] = df["Location"].apply(region_map)
df["Pigment"] = df["VitiligoPigment"].apply(lambda x: pigment_maps[x])
df["Side"] = df["VitiligoLeftRight"].str.capitalize()

In [9]:
df

Unnamed: 0,File,ScanNumber,SkinID,Location,RunNumber,RunLetter,VitiligoPigment,Parallel,OtherDetail,VitiligoLeftRight,Region,Pigment,Side
0,..\Data\SKIN01\Scan_4.hdf5,4,SKIN01,leg,1,a,,,,,Leg,,
1,..\Data\SKIN01\Scan_7.hdf5,7,SKIN01,forearm,1,a,,,,,Radial Artery,,
2,..\Data\SKIN01\Scan_10.hdf5,10,SKIN01,bicep,1,a,,,,,Bicep,,
3,..\Data\SKIN01\Scan_13.hdf5,13,SKIN01,neck,1,a,,,,,Carotid Artery,,
4,..\Data\SKIN02\Scan_2.hdf5,2,SKIN02,leg,1,a,,,,,Leg,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,..\Data\SKIN43\Scan_2.hdf5,2,SKIN43,leg,1,a,,,,,Leg,,
206,..\Data\SKIN43\Scan_5.hdf5,5,SKIN43,forearm,1,a,,,,,Radial Artery,,
207,..\Data\SKIN43\Scan_8.hdf5,8,SKIN43,bicep,1,a,,,,,Bicep,,
208,..\Data\SKIN43\Scan_11.hdf5,11,SKIN43,neck,1,a,,,,,Carotid Artery,,


In [10]:
df_ita = pd.read_parquet("ITA_raw.parquet")
df_full = pd.merge(df_ita, df, validate="one_to_many")

In [11]:
df_full.to_parquet("scan_table.parquet")