In [1]:
import pandas as pd
from pathlib import Path
import re

In [2]:
# data_folder = Path("ColourimeterData")
data_folder = Path("../")
regions = ["phantom", "leg", "forearm", "neck", "bicep", "radial"]
proper_name = {
    "phantom": "Phantom",
    "leg": "Leg",
    "forearm": "Radial Artery",
    "forarm": "Radial Artery",
    "foreearm": "Radial Artery",
    "foreaerm": "Radial Artery",
    "radial": "Radial Artery",
    "ulnar": "Ulnar Artery",
    "brachialartery": "Brachial Artery",
    "neck": "Carotid Artery",
    "bicep": "Bicep",
    "calf": "Leg",
    "brachial artery": "Brachial Artery",
    "radius": "Radial Artery",
    "brachial": "Brachial Artery",
}


def get_scan_location(x):
    scan_regions = re.findall("|".join(list(proper_name)), x.lower())
    return proper_name[scan_regions[0]] if len(scan_regions) == 1 else None


def get_pigmented(x):
    # This is wrong.
    if "vitiligo" in x.lower():
        return "Not Pigmented"
    elif "not pigmented" in x.lower():
        return "Not Pigmented"
    elif "pigmented" in x.lower():
        return "Pigmented"
    elif " p " in x.lower():
        return "Pigmented"
    elif " np " in x.lower():
        return "Not Pigmented"
    elif "no pigment" in x.lower():
        return "Not Pigmented"
    elif "pigment" in x.lower():
        return "Pigmented"
    return ""

# 1. Load all the colourimeter files in the data folder, extract the scan names/ID numbers etc.

In [3]:
details = []

for file in data_folder.glob("**/*.cmf"):
    if file.stem[0] == ".":
        continue
    skin_id = file.stem.split(" - ")[-1]
    # print(file)
    df_col = pd.read_table(file, skiprows=2)
    df_col.columns = [str.strip(c) for c in df_col.columns]
    df_col["Region"] = df_col["Label"].apply(get_scan_location)
    df_col["Pigment"] = df_col["Label"].apply(get_pigmented)
    df_col["Side"] = df_col["Label"].apply(
        lambda x: (
            "Left" if "left" in x.lower() else "Right" if "right" in x.lower() else ""
        )
    )
    df_col["SkinID"] = skin_id
    details.append(df_col)

In [4]:
details[0].columns

Index(['Label', 'Date', 'Eryth.', 'Melanin', 'CIE-L*', 'CIE-a*', 'CIE-b*', 'X',
       'Y', 'Z', 'Chroma', 'Hue', 'ITA', 'Gloss', 'sR', 'sG', 'sB', 'CF1',
       'CF2', 'CF3', 'CF4', 'CF5', 'CF6', 'CF7', 'CF8', 'CNIR', 'CCLEAR',
       'GF1', 'GF2', 'GF3', 'GF4', 'GF5', 'GF6', 'GF7', 'GF8', 'GNIR',
       'GCLEAR', 'Calib.', 'Status', 'Region', 'Pigment', 'Side', 'SkinID'],
      dtype='object')

In [5]:
columns = [
    "SkinID",
    "Region",
    "Pigment",
    "Side",
    "ITA",
    "CIE-L*",
    "CIE-a*",
    "CIE-b*",
    "Melanin",
    "Eryth.",
    "sR",
    "sG",
    "sB",
]

df = pd.concat(details).sort_values("SkinID")
df = df[columns].groupby(columns[:4]).mean().reset_index()

In [6]:
# Manually fix values for the SKIN36
mask = df["SkinID"] == "SKIN36"
mask &= df["Region"] == "Carotid Artery"
mask &= df["Pigment"] != "Not Pigmented"
df.loc[mask, "Pigment"] = "Pigmented"

In [7]:
df = df.sort_values(["SkinID", "Region", "Pigment", "Side"])

In [8]:
df.to_parquet("ITA_raw.parquet")