In [64]:
# imports
import os, glob
import random
import xml.etree.ElementTree as ET
from dataclasses import dataclass

import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split

In [120]:
IMG_DIR = "img_data"
LABEL_DIR = "label_data"
images = root.findall("image")

In [11]:
xml_path = "label_data/ann_a23.xml"

tree = ET.parse(xml_path)
root = tree.getroot()
images = root.findall("image")
# print(root.tag)
# list(root)[:5]
# len(images)

annotations


[<Element 'version' at 0x00000176601F7060>,
 <Element 'meta' at 0x00000176601F70B0>,
 <Element 'image' at 0x0000017660219990>,
 <Element 'image' at 0x0000017660219F80>,
 <Element 'image' at 0x000001766021A570>]

In [13]:
img = images[0]

print("Image name:", img.attrib.get("name"))
print("Image id:", img.attrib.get("id"))

for attr in img.findall("attribute"):
    print(attr.attrib["name"], ":", attr.text)

Image name: left-side.jpg
Image id: 0


In [15]:
[c.tag for c in list(images[0])[:20]]

['tag', 'tag', 'tag', 'tag', 'tag', 'tag', 'tag', 'tag', 'tag']

In [16]:
for t in images[0].findall("tag"):
    print("TAG:", t.attrib.get("label"))
    for a in t.findall("attribute"):
        print("  ", a.attrib.get("name"), "=", (a.text or "").strip())

TAG: Dryness
   Severity = None
TAG: DarkCircles
   Severity = None
TAG: Wrinkles
   Severity = Mild
TAG: Texture
   Severity = Mild
TAG: Scarring
   Severity = Mild
TAG: Pose
   view = Three_quarters_L
TAG: Acne
   Severity = None
TAG: Redness
   Severity = None
TAG: Pigmentation
   Severity = Mild


In [17]:
record = {"filename": images[0].attrib["name"]}

for t in images[0].findall("tag"):
    label = t.attrib["label"]          # e.g., "Acne"
    attr = t.findall("attribute")[0]   # first (and only) attribute
    record[label] = (attr.text or "").strip()

record

{'filename': 'left-side.jpg',
 'Dryness': 'None',
 'DarkCircles': 'None',
 'Wrinkles': 'Mild',
 'Texture': 'Mild',
 'Scarring': 'Mild',
 'Pose': 'Three_quarters_L',
 'Acne': 'None',
 'Redness': 'None',
 'Pigmentation': 'Mild'}

## GENERATING DATAFRAME + PREREQS FOR MODEL

In [96]:
# develop a dataframe taken from xml file
'''
<annotations>            ← ROOT (grandparent)
   <image>               ← child
      <tag>              ← grandchild
         <attribute>     ← great-grandchild

generate family tree for all xml_files
-> then find all images associated with each tree, then develop df
input: xml_files - list of all paths to xml files 
output: dataframe
'''
def create_df(xml_files):
    rows = []
    for xml_path in xml_files:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        images = root.findall("image")
    
        for img in images:
            row = {"filename": img.attrib["name"]}
            for t in img.findall("tag"):
                label = t.attrib["label"]
                attr = t.findall("attribute")[0]
                row[label] = (attr.text).strip()
            rows.append(row)
    
    df = pd.DataFrame(rows).replace({np.nan: 'None'})
    return df

In [97]:
# filename matching, gather all xml files, generate df
xml_files = glob.glob("label_data/*.xml")
df = create_df(xml_files)
df.head()

Unnamed: 0,filename,Acne,Redness,Pigmentation,Dryness,DarkCircles,Wrinkles,Texture,Scarring,Pose
0,levle0_1.jpg,Mild,Mild,Mild,,Mild,,Mild,Mild,Three_quarters_R
1,levle0_100.jpg,Mild,Mild,Mild,Severe,Moderate,,Mild,Mild,Three_quarters_L
2,levle0_105.jpg,Mild,Mild,Mild,Severe,Moderate,,Mild,,Three_quarters_L
3,levle0_107.jpg,Mild,Mild,Mild,Mild,,,Mild,,Three_quarters_R
4,levle0_114.jpg,Moderate,Moderate,Moderate,,,,Severe,Severe,Three_quarters_R


In [115]:
# I think the best approaching a model first is by making each attribute binary: JUST FOR NOW.
# None = 0, every other severity = 1
LABEL_COLS = df.columns
df_bin = df.copy()

for c in df.columns:
    df_bin[c] = (df_bin[c] != "None").astype(int)
df_bin["filename"] = df["filename"]
df_bin[LABEL_COLS].head()

Unnamed: 0,filename,Acne,Redness,Pigmentation,Dryness,DarkCircles,Wrinkles,Texture,Scarring,Pose
0,levle0_1.jpg,1,1,1,0,1,0,1,1,1
1,levle0_100.jpg,1,1,1,1,1,0,1,1,1
2,levle0_105.jpg,1,1,1,1,1,0,1,0,1
3,levle0_107.jpg,1,1,1,1,0,0,1,0,1
4,levle0_114.jpg,1,1,1,0,0,0,1,1,1


In [139]:
LABEL_COLS = df.columns[:-1]

In [104]:
# checking label balance
# pos_counts = df_bin[LABEL_COLS].sum().sort_values(ascending=False)
# pos_rate = (pos_counts / len(df_bin)).round(3)

# pos_counts, pos_rate

In [140]:
# weights that we are going to use for BCE Loss for our model
# we are using this because our labels are quite imbalanced
pos = df_bin[LABEL_COLS[1:]].sum()
neg = len(df_bin) - pos

pos_weight = neg / pos

pos_weight

Acne            0.141509
Redness         0.216080
Pigmentation    0.130841
Dryness         0.890625
DarkCircles     0.440476
Wrinkles        1.987654
Texture         0.146919
Scarring        0.174757
dtype: float64

## BEGIN TRAINING MODEL

In [141]:
train_df, val_df = train_test_split(df_bin, test_size=0.2, random_state=42, shuffle=True)
len(train_df), len(val_df)

(193, 49)

In [142]:
train_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
])

val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

class SkinDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(IMG_DIR, row["filename"])
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)
        y = torch.tensor(row[LABEL_COLS[1:]].to_numpy(dtype="float32"), dtype=torch.float32)
        return img, y

In [143]:
train_loader = DataLoader(SkinDataset(train_df, train_tf), batch_size=8, shuffle=True)
x, y = next(iter(train_loader))

x.shape, y.shape, y[0]

(torch.Size([8, 3, 224, 224]),
 torch.Size([8, 8]),
 tensor([1., 1., 1., 1., 1., 0., 1., 1.]))

In [144]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, len(LABEL_COLS)-1)  # 8 outputs
model = model.to(device)

model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [145]:
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight, device=device))
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

  criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight, device=device))


In [146]:
model.train()
x, y = next(iter(train_loader))
x, y = x.to(device), y.to(device)

logits = model(x)                 # (batch, 8)
loss = criterion(logits, y)

optimizer.zero_grad()
loss.backward()
optimizer.step()

logits.shape, loss.item()

(torch.Size([8, 8]), 0.3742481470108032)

In [147]:
for name, p in model.named_parameters():
    p.requires_grad = name.startswith("fc.")

optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)

for epoch in range(3):
    model.train()
    total_loss = 0.0
    
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        loss = criterion(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * x.size(0)

    avg_loss = total_loss / len(train_loader.dataset)
    print(f"epoch {epoch+1}: train_loss={avg_loss:.4f}")

epoch 1: train_loss=0.3914
epoch 2: train_loss=0.3608
epoch 3: train_loss=0.3431
