In [6]:
# imports
import os, glob
import random
import xml.etree.ElementTree as ET
from dataclasses import dataclass

import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

In [7]:
IMG_DIR = "img_data"
LABEL_DIR = "label_data"

In [10]:
xml_path = sorted(glob.glob(os.path.join(LABEL_DIR, "*.xml")))[0]
stem = os.path.splitext(os.path.basename(xml_path))[0]
print("XML:", xml_path)
print("STEM:", stem)

tree = ET.parse(xml_path)
root = tree.getroot()

labels = {child.tag: (child.text or "").strip() for child in root}
labels

XML: label_data\ann_a0.xml
STEM: ann_a0


{'version': '1.1', 'meta': '', 'image': ''}

In [11]:
xml_path = "label_data/ann_a23.xml"

tree = ET.parse(xml_path)
root = tree.getroot()
images = root.findall("image")
# print(root.tag)
# list(root)[:5]
# len(images)

annotations


[<Element 'version' at 0x00000176601F7060>,
 <Element 'meta' at 0x00000176601F70B0>,
 <Element 'image' at 0x0000017660219990>,
 <Element 'image' at 0x0000017660219F80>,
 <Element 'image' at 0x000001766021A570>]

In [13]:
img = images[0]

print("Image name:", img.attrib.get("name"))
print("Image id:", img.attrib.get("id"))

for attr in img.findall("attribute"):
    print(attr.attrib["name"], ":", attr.text)

Image name: left-side.jpg
Image id: 0


In [15]:
[c.tag for c in list(images[0])[:20]]

['tag', 'tag', 'tag', 'tag', 'tag', 'tag', 'tag', 'tag', 'tag']

In [16]:
for t in images[0].findall("tag"):
    print("TAG:", t.attrib.get("label"))
    for a in t.findall("attribute"):
        print("  ", a.attrib.get("name"), "=", (a.text or "").strip())

TAG: Dryness
   Severity = None
TAG: DarkCircles
   Severity = None
TAG: Wrinkles
   Severity = Mild
TAG: Texture
   Severity = Mild
TAG: Scarring
   Severity = Mild
TAG: Pose
   view = Three_quarters_L
TAG: Acne
   Severity = None
TAG: Redness
   Severity = None
TAG: Pigmentation
   Severity = Mild


In [17]:
record = {"filename": images[0].attrib["name"]}

for t in images[0].findall("tag"):
    label = t.attrib["label"]          # e.g., "Acne"
    attr = t.findall("attribute")[0]   # first (and only) attribute
    record[label] = (attr.text or "").strip()

record

{'filename': 'left-side.jpg',
 'Dryness': 'None',
 'DarkCircles': 'None',
 'Wrinkles': 'Mild',
 'Texture': 'Mild',
 'Scarring': 'Mild',
 'Pose': 'Three_quarters_L',
 'Acne': 'None',
 'Redness': 'None',
 'Pigmentation': 'Mild'}

In [18]:
import pandas as pd

rows = []

for img in images:
    rec = {"filename": img.attrib["name"]}
    for t in img.findall("tag"):
        label = t.attrib["label"]
        attr = t.findall("attribute")[0]
        rec[label] = (attr.text or "").strip()
    rows.append(rec)

df = pd.DataFrame(rows)
df.head()

Unnamed: 0,filename,Dryness,DarkCircles,Wrinkles,Texture,Scarring,Pose,Acne,Redness,Pigmentation
0,left-side.jpg,,,Mild,Mild,Mild,Three_quarters_L,,,Mild
1,left-side1.jpg,,,,,,Three_quarters_L,,,
2,left-side10.jpg,,Mild,,Moderate,Mild,Three_quarters_L,Mild,,Moderate
3,left-side11.jpg,,Mild,Mild,Moderate,Mild,Three_quarters_L,Mild,,Mild
4,left-side12.jpg,,Moderate,,Mild,Mild,Three_quarters_L,Mild,,Moderate
