In [None]:
!pip install tensorflow --upgrade


Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.20.0 (from tensorflow)
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow)
  Downloading wheel-0.45.1-py3-none-any.whl.metadata (2.3 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)
  Downloading tensorboard_data_server-0.

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
import os
import glob
from google.colab import drive

# --- Mount Drive and Set Paths ---
print("Connecting to Google Drive...")
drive.mount('/content/drive', force_remount=True)

DATA_ROOT_DIR = '/content/drive/Shareddrives/Skinterest-2b/data/Resize_Data/'
CSV_PATH = '/content/drive/Shareddrives/Skinterest-2b/data/SCIN_CLEAN.csv'

# --- Load Metadata ---
print("Loading metadata...")
df = pd.read_csv(CSV_PATH)

# --- Restructure Data (flatten image paths + labels) ---
print("Restructuring data...")
df1 = df[['image_1_path', 'image_1_shot_type']].rename(
    columns={'image_1_path': 'partial_path', 'image_1_shot_type': 'label_text'}
)
df2 = df[['image_2_path', 'image_2_shot_type']].rename(
    columns={'image_2_path': 'partial_path', 'image_2_shot_type': 'label_text'}
)
df3 = df[['image_3_path', 'image_3_shot_type']].rename(
    columns={'image_3_path': 'partial_path', 'image_3_shot_type': 'label_text'}
)

combined_df = pd.concat([df1, df2, df3], ignore_index=True)
combined_df.dropna(subset=['partial_path', 'label_text'], inplace=True)

# Extract filename from path
combined_df['filename'] = combined_df['partial_path'].apply(lambda x: os.path.basename(x))

# --- Find All Actual RGB Images in Drive ---
print("Searching for all RGB images in Drive...")
image_paths = glob.glob(
    os.path.join(DATA_ROOT_DIR, 'resize_224_split_images_*', 'rgb', '*.png')
)
print(f"Found {len(image_paths)} actual image files.")

# Map basename → full path
path_map = {os.path.basename(p): p for p in image_paths}

# Attach full paths
combined_df['image_path'] = combined_df['filename'].map(path_map)

# Drop rows without matching files
combined_df.dropna(subset=['image_path'], inplace=True)
print(f"Successfully matched {len(combined_df)} images with their full paths.")

# --- Create Binary Label Column ---
combined_df['label'] = combined_df['label_text'].apply(
    lambda x: 1 if 'HARSH' in str(x).upper() else 0
)

# --- Verify ---
print("\nSample of matched data:")
print(combined_df[['filename', 'label_text', 'label', 'image_path']].head())

print("\nUnique shot types found in CSV:")
print(combined_df['label_text'].unique())

# --- Train/Val/Test Split ---
final_df = combined_df
train_df, test_val_df = train_test_split(
    final_df, test_size=0.3, random_state=42, stratify=final_df['label']
)
val_df, test_df = train_test_split(
    test_val_df, test_size=0.5, random_state=42, stratify=test_val_df['label']
)

print(f"\nTraining samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Testing samples: {len(test_df)}")


Connecting to Google Drive...
Mounted at /content/drive
Loading metadata...
Restructuring data...
Searching for all RGB images in Drive...
Found 770 actual image files.
Successfully matched 770 images with their full paths.

Sample of matched data:
                   filename   label_text  label  \
0  -5949315841433628424.png  AT_DISTANCE      0   
2  -6837240536182868524.png  AT_DISTANCE      0   
5  -1824718034048585128.png  AT_AN_ANGLE      0   
6  -2805714129362314533.png  AT_AN_ANGLE      0   
9  -4238387650372285498.png     CLOSE_UP      0   

                                          image_path  
0  /content/drive/Shareddrives/Skinterest-2b/data...  
2  /content/drive/Shareddrives/Skinterest-2b/data...  
5  /content/drive/Shareddrives/Skinterest-2b/data...  
6  /content/drive/Shareddrives/Skinterest-2b/data...  
9  /content/drive/Shareddrives/Skinterest-2b/data...  

Unique shot types found in CSV:
['AT_DISTANCE' 'AT_AN_ANGLE' 'CLOSE_UP']

Training samples: 539
Validation sample

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

# Make sure this path is correct
CSV_PATH = '/content/drive/Shared drives/Skinterest-2b/data/SCIN_CLEAN.csv'
df_info = pd.read_csv(CSV_PATH)

# This will print all the column names
print("All available column names:")
print(df_info.columns)

All available column names:
Index(['case_id', 'source', 'release', 'year', 'age_group', 'sex_at_birth',
       'fitzpatrick_skin_type', 'dermatologist_fitzpatrick_skin_type_label_1',
       'dermatologist_fitzpatrick_skin_type_label_2',
       'dermatologist_fitzpatrick_skin_type_label_3',
       'monk_skin_tone_label_india', 'monk_skin_tone_label_us',
       'dermatologist_skin_condition_on_label_name',
       'dermatologist_skin_condition_confidence',
       'race_ethnicity_american_indian_or_alaska_native',
       'race_ethnicity_asian', 'race_ethnicity_black_or_african_american',
       'race_ethnicity_hispanic_latino_or_spanish_origin',
       'race_ethnicity_middle_eastern_or_north_african',
       'race_ethnicity_native_hawaiian_or_pacific_islander',
       'race_ethnicity_white', 'race_ethnicity_other_race',
       'race_ethnicity_prefer_not_to_answer', 'textures_raised_or_bumpy',
       'textures_flat', 'textures_rough_or_flaky', 'textures_fluid_filled',
       'body_parts_hea

In [None]:
import os

DATA_ROOT_DIR = '/content/drive/Shareddrives/Skinterest-2b/data/Resize_Data/'
CSV_PATH = '/content/drive/Shareddrives/Skinterest-2b/data/SCIN_CLEAN.csv'

# Option A: Use DATA_ROOT_DIR directly
print("DATA_ROOT_DIR:", DATA_ROOT_DIR)
print("\nContents of DATA_ROOT_DIR:")
print(os.listdir(DATA_ROOT_DIR))

split_path = os.path.join(DATA_ROOT_DIR, "resize_224_split_images_1")
print("\nContents of resize_224_split_images_1:")
print(os.listdir(split_path))

rgb_path = os.path.join(split_path, "rgb")
print("\nContents of rgb folder (first 10 files):")
print(os.listdir(rgb_path)[:10])


DATA_ROOT_DIR: /content/drive/Shareddrives/Skinterest-2b/data/Resize_Data/

Contents of DATA_ROOT_DIR:
['.DS_Store', 'resize_224_split_images_6', 'resize_224_split_images_7', 'resize_224_split_images_2', 'resize_224_split_images_3', 'resize_224_split_images_1', 'resize_224_split_images_4', 'resize_224_split_images_5', 'Resize_160', 'resize_224_split_images_8']

Contents of resize_224_split_images_1:
['.DS_Store', 'rgb', 'hsv', 'lab']

Contents of rgb folder (first 10 files):
['-1628413175243738425.png', '-1642667040160094612.png', '-1793399078708447490.png', '-2099414475733487459.png', '-1067701776636141108.png', '-1029834132011629215.png', '-2101790873047080311.png', '-1532307918011967845.png', '-1297003684632367094.png', '-144864341463911154.png']


In [None]:
!pip install tensorflow==2.15


[31mERROR: Could not find a version that satisfies the requirement tensorflow==2.15 (from versions: 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0, 2.17.1, 2.18.0rc0, 2.18.0rc1, 2.18.0rc2, 2.18.0, 2.18.1, 2.19.0rc0, 2.19.0, 2.19.1, 2.20.0rc0, 2.20.0)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow==2.15[0m[31m
[0m

In [None]:
import glob

# Collect all rgb images across all split folders
all_image_paths = glob.glob(os.path.join(image_root, "resize_224_split_images_*", "rgb", "*.png"))
path_map = {os.path.basename(p): p for p in all_image_paths}

df['filename'] = df['image_1_path'].apply(lambda x: os.path.basename(str(x)))
df['image_path'] = df['filename'].map(path_map)

print(df[['image_1_path','filename','image_path']].head())



                              image_1_path                  filename  \
0  dataset/images/-5949315841433628424.png  -5949315841433628424.png   
1    dataset/images/325464533153467313.png    325464533153467313.png   
2  dataset/images/-6837240536182868524.png  -6837240536182868524.png   
3   dataset/images/2983323875335943836.png   2983323875335943836.png   
4   dataset/images/3104801012387799539.png   3104801012387799539.png   

   image_path  
0         NaN  
1         NaN  
2         NaN  
3         NaN  
4         NaN  


In [None]:
import pandas as pd
import os, glob

# Paths
csv_path = '/content/drive/Shareddrives/Skinterest-2b/data/SCIN_CLEAN.csv'
image_root = '/content/drive/Shareddrives/Skinterest-2b/data/Resize_Data/'

# Load CSV
df = pd.read_csv(csv_path)

# Flatten all image paths
df1 = df[['image_1_path', 'image_1_shot_type']].rename(
    columns={'image_1_path': 'partial_path', 'image_1_shot_type': 'label_text'})
df2 = df[['image_2_path', 'image_2_shot_type']].rename(
    columns={'image_2_path': 'partial_path', 'image_2_shot_type': 'label_text'})
df3 = df[['image_3_path', 'image_3_shot_type']].rename(
    columns={'image_3_path': 'partial_path', 'image_3_shot_type': 'label_text'})

combined_df = pd.concat([df1, df2, df3], ignore_index=True)
combined_df.dropna(subset=['partial_path', 'label_text'], inplace=True)

# Extract basename
combined_df['filename'] = combined_df['partial_path'].apply(lambda x: os.path.basename(str(x)))

# Collect all actual images in Drive
all_image_paths = glob.glob(os.path.join(image_root, "resize_224_split_images_*", "rgb", "*.png"))

# Map basename -> full path
path_map = {os.path.basename(p): p for p in all_image_paths}

# Attach
combined_df['image_path'] = combined_df['filename'].map(path_map)

# Drop missing
combined_df.dropna(subset=['image_path'], inplace=True)

print("CSV rows:", len(df))
print("Expanded combined_df rows:", len(combined_df))
print("Unique filenames matched:", combined_df['filename'].nunique())
print(combined_df.head())

print("Non-null counts:")
print("image_1_path:", df['image_1_path'].notna().sum())
print("image_2_path:", df['image_2_path'].notna().sum())
print("image_3_path:", df['image_3_path'].notna().sum())



CSV rows: 770
Expanded combined_df rows: 770
Unique filenames matched: 770
                              partial_path   label_text  \
0  dataset/images/-5949315841433628424.png  AT_DISTANCE   
2  dataset/images/-6837240536182868524.png  AT_DISTANCE   
5  dataset/images/-1824718034048585128.png  AT_AN_ANGLE   
6  dataset/images/-2805714129362314533.png  AT_AN_ANGLE   
9  dataset/images/-4238387650372285498.png     CLOSE_UP   

                   filename                                         image_path  
0  -5949315841433628424.png  /content/drive/Shareddrives/Skinterest-2b/data...  
2  -6837240536182868524.png  /content/drive/Shareddrives/Skinterest-2b/data...  
5  -1824718034048585128.png  /content/drive/Shareddrives/Skinterest-2b/data...  
6  -2805714129362314533.png  /content/drive/Shareddrives/Skinterest-2b/data...  
9  -4238387650372285498.png  /content/drive/Shareddrives/Skinterest-2b/data...  
Non-null counts:
image_1_path: 770
image_2_path: 522
image_3_path: 398


In [None]:
# Full multi-head ResNet152V2 fine-tuning pipeline
# Run this after you've already produced combined_df that contains:
#   'filename', 'image_path', and CSV columns from SCIN_CLEAN.
# Adjust column names in the CONFIG section if your CSV uses different names.

import os
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, losses, optimizers, callbacks
from tensorflow.keras.applications import ResNet152V2
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet_preprocess
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight

# ---------- CONFIG ----------
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
AUTO = tf.data.AUTOTUNE
EPOCHS_HEADS = 8        # train heads first
EPOCHS_UNFREEZE = 12    # then unfreeze top and train more
LEARNING_RATE_HEADS = 1e-3
LEARNING_RATE_FINETUNE = 5e-5
MODEL_INPUT_SHAPE = IMAGE_SIZE + (3,)

# Column names in your dataframe (change if different)
COL_IMAGE_PATH = 'image_path'   # full path we already produced
# For lighting quality (binary): set to column in CSV that indicates harsh lighting.
# If none exists, the code will try 'image_?_shot_type' but will fallback to placeholder.
COL_LIGHTING_SOURCE = 'image_1_shot_type'  # example; change to real lighting column if available

# Undertone column (multi-class). Example candidates: 'monk_skin_tone_label_us' or 'monk_skin_tone_label_india'
COL_UNDERTONE = 'monk_skin_tone_label_us'   # change as required

# Skin type (Fitzpatrick) column (multi-class)
COL_SKIN_TYPE = 'dermatologist_fitzpatrick_skin_type_label_1'  # change as required

# Grouping column for evaluation (sensitivity/specificity per group),
# typically fitzpatrick or monk skin tone. Set to one of the above or None.
EVAL_GROUP_COL = COL_SKIN_TYPE

# Loss weights for multi-task loss (you can tune these)
LOSS_WEIGHTS = {
    'lighting_out': 1.0,
    'undertone_out': 1.0,
    'skin_type_out': 1.0
}

# ---------- Helpers & Sanity checks ----------
print("TensorFlow version:", tf.__version__)
tf.keras.mixed_precision.set_global_policy('mixed_float16')  # speedup on GPU

# You must have `combined_df` already in the workspace. If not, load it first:
try:
    combined_df
except NameError:
    raise RuntimeError("combined_df not found in workspace. Run your preprocessing cell that builds combined_df first.")

print("Initial combined_df rows:", len(combined_df))

# Ensure image paths exist — filter out missing files
exists_mask = combined_df[COL_IMAGE_PATH].apply(lambda p: os.path.exists(p))
if not exists_mask.all():
    missing = (~exists_mask).sum()
    print(f"Warning: {missing} entries have missing image file paths; they will be dropped.")
    combined_df = combined_df[exists_mask].copy()

# ---------- Create target columns ----------
# LIGHTING (binary): try to derive from known shot_type columns, but user should replace with real lighting annotation if available.
if COL_LIGHTING_SOURCE in combined_df.columns:
    print(f"Using {COL_LIGHTING_SOURCE} as lighting source to derive 'lighting_label' (HARSH vs NON-HARSH).")
    combined_df['lighting_label'] = combined_df[COL_LIGHTING_SOURCE].astype(str).apply(
        lambda x: 1 if 'HARSH' in x.upper() else 0
    )
else:
    print(f"Note: {COL_LIGHTING_SOURCE} not found. Creating placeholder 'lighting_label' = 0 for all (you should replace with a real column).")
    combined_df['lighting_label'] = 0

# UNDERTONE: multi-class — if column missing, fallback to monk_skin_tone_label_us or create placeholder.
if COL_UNDERTONE in combined_df.columns and combined_df[COL_UNDERTONE].notna().any():
    combined_df['undertone_raw'] = combined_df[COL_UNDERTONE].astype(str)
else:
    # fallback: try other likely columns
    fallbacks = ['monk_skin_tone_label_india', 'monk_skin_tone_label_us']
    chosen = None
    for col in fallbacks:
        if col in combined_df.columns and combined_df[col].notna().any():
            chosen = col
            break
    if chosen:
        print(f"Using fallback undertone column {chosen}")
        combined_df['undertone_raw'] = combined_df[chosen].astype(str)
    else:
        print("No undertone column found. Creating placeholder 'undertone_raw' with single class 'unknown'.")
        combined_df['undertone_raw'] = 'unknown'

# SKIN TYPE (Fitzpatrick multi-class): try provided column; fallback if missing
if COL_SKIN_TYPE in combined_df.columns and combined_df[COL_SKIN_TYPE].notna().any():
    combined_df['skin_type_raw'] = combined_df[COL_SKIN_TYPE].astype(str)
else:
    # try dermatologist_fitzpatrick_skin_type_label_2/3
    for col in ['dermatologist_fitzpatrick_skin_type_label_2', 'dermatologist_fitzpatrick_skin_type_label_3']:
        if col in combined_df.columns and combined_df[col].notna().any():
            combined_df['skin_type_raw'] = combined_df[col].astype(str)
            print(f"Using fallback skin type column {col}")
            break
    else:
        print("No Fitzpatrick column found. Creating placeholder 'skin_type_raw' with single class 'unknown'.")
        combined_df['skin_type_raw'] = 'unknown'

# Encode categorical targets
le_undertone = LabelEncoder()
combined_df['undertone_id'] = le_undertone.fit_transform(combined_df['undertone_raw'].fillna('unknown'))

le_skin = LabelEncoder()
combined_df['skin_type_id'] = le_skin.fit_transform(combined_df['skin_type_raw'].fillna('unknown'))

print("Undertone classes:", le_undertone.classes_)
print("Skin-type classes:", le_skin.classes_)
print("Lighting positive count (HARSH=1):", combined_df['lighting_label'].sum(), "/", len(combined_df))


print(combined_df['lighting_label'].value_counts())
print(combined_df['undertone_raw'].value_counts())
print(combined_df['skin_type_raw'].value_counts())


# ---------- Train/Val/Test split ----------
from sklearn.model_selection import train_test_split
final_df = combined_df.copy()
train_df, test_val_df = train_test_split(final_df, test_size=0.3, random_state=42, stratify=final_df['lighting_label'])
val_df, test_df = train_test_split(test_val_df, test_size=0.5, random_state=42, stratify=test_val_df['lighting_label'])

print("Split sizes:", len(train_df), len(val_df), len(test_df))

# ---------- Build tf.data pipelines ----------
def decode_and_preprocess(path, label_lighting, label_undertone, label_skin):
    # read
    image = tf.io.read_file(path)
    image = tf.image.decode_image(image, channels=3)  # handles png/jpg
    image.set_shape([None, None, 3])
    # convert to float32
    image = tf.image.convert_image_dtype(image, tf.float32)
    # resize
    image = tf.image.resize(image, IMAGE_SIZE)
    # preprocessing for ResNetV2: scale to [-1,1]
    image = resnet_preprocess(image * 255.0)  # resnet preprocess expects [0,255] input
    return image, {'lighting_out': label_lighting, 'undertone_out': label_undertone, 'skin_type_out': label_skin}

# Data augmentation (applied only to training)
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.08),
    layers.RandomZoom(0.08),
    # Add more if needed
], name="data_augmentation")

def preprocess_for_train(path, lighting, undertone, skin):
    image, labels = decode_and_preprocess(path, lighting, undertone, skin)
    image = data_augmentation(image)
    return image, labels

def preprocess_for_eval(path, lighting, undertone, skin):
    image, labels = decode_and_preprocess(path, lighting, undertone, skin)
    return image, labels

def make_dataset(df, shuffle=False, batch_size=BATCH_SIZE, training=False):
    paths = df[COL_IMAGE_PATH].values
    lighting = df['lighting_label'].astype(np.int32).values
    undertone = df['undertone_id'].astype(np.int32).values
    skin = df['skin_type_id'].astype(np.int32).values

    ds = tf.data.Dataset.from_tensor_slices((paths, lighting, undertone, skin))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths))
    map_fn = preprocess_for_train if training else preprocess_for_eval
    ds = ds.map(map_fn, num_parallel_calls=AUTO)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTO)
    return ds

train_ds = make_dataset(train_df, shuffle=True, training=True)
val_ds = make_dataset(val_df, shuffle=False, training=False)
test_ds = make_dataset(test_df, shuffle=False, training=False)

# ---------- Compute class weights for lighting (binary) to help imbalance ----------
classes = np.unique(train_df['lighting_label'])
class_weights = {}
if len(classes) > 1:
    weights = compute_class_weight(class_weight='balanced', classes=classes, y=train_df['lighting_label'].values)
    class_weights = {int(c): float(w) for c, w in zip(classes, weights)}
    print("Lighting class weights:", class_weights)
else:
    print("Only one class present for lighting in train set; class weighting skipped.")
    class_weights = None

# ---------- Build the multi-head model ----------
def build_model(num_undertone_classes, num_skin_classes, input_shape=MODEL_INPUT_SHAPE):
    # Backbone without top
    base = ResNet152V2(include_top=False, input_shape=input_shape, weights='imagenet', pooling='avg')
    base.trainable = False  # freeze initially

    inputs = layers.Input(shape=input_shape)
    x = inputs
    # base preprocessing already applied in pipeline
    x = base(x, training=False)
    x = layers.Dropout(0.3)(x)

    # Lighting head (binary)
    lighting = layers.Dense(128, activation='relu')(x)
    lighting = layers.Dropout(0.2)(lighting)
    lighting_out = layers.Dense(1, activation='sigmoid', dtype='float32', name='lighting_out')(lighting)

    # Undertone head (multiclass)
    ud = layers.Dense(128, activation='relu')(x)
    ud = layers.Dropout(0.2)(ud)
    undertone_out = layers.Dense(num_undertone_classes, activation='softmax', dtype='float32', name='undertone_out')(ud)

    # Skin-type head (multiclass)
    st = layers.Dense(128, activation='relu')(x)
    st = layers.Dropout(0.2)(st)
    skin_type_out = layers.Dense(num_skin_classes, activation='softmax', dtype='float32', name='skin_type_out')(st)

    model = models.Model(inputs=inputs, outputs=[lighting_out, undertone_out, skin_type_out])
    return model

num_undertone_classes = len(le_undertone.classes_)
num_skin_classes = len(le_skin.classes_)
model = build_model(num_undertone_classes, num_skin_classes)
model.summary()

# ---------- Compile for head training ----------
losses_dict = {
    'lighting_out': losses.BinaryCrossentropy(),
    'undertone_out': losses.SparseCategoricalCrossentropy(),
    'skin_type_out': losses.SparseCategoricalCrossentropy()
}

metrics_dict = {
    'lighting_out': [tf.keras.metrics.BinaryAccuracy(name='acc'), tf.keras.metrics.AUC(name='auc')],
    'undertone_out': [tf.keras.metrics.SparseCategoricalAccuracy(name='acc')],
    'skin_type_out': [tf.keras.metrics.SparseCategoricalAccuracy(name='acc')]
}

opt = optimizers.Adam(learning_rate=LEARNING_RATE_HEADS)
model.compile(optimizer=opt, loss=losses_dict, loss_weights=LOSS_WEIGHTS, metrics=metrics_dict)

# ---------- Callbacks ----------
ckpt_path = "/content/drive/MyDrive/skinterest_resnet_multitask_ckpt.weights.h5"

cb = [
    callbacks.ModelCheckpoint(
        ckpt_path,
        monitor='val_lighting_out_acc',
        save_best_only=True,
        save_weights_only=True,
        mode='max'
    ),
    callbacks.EarlyStopping(
        monitor='val_lighting_out_acc',
        patience=5,
        restore_best_weights=True,
        mode='max'
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_lighting_out_acc',
        factor=0.5,
        patience=3,
        mode='max'
    )
]



# ---------- Train heads ----------
history_heads = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_HEADS,
    callbacks=cb,
    # Keras does class_weight only supports single-output mapping. We'll handle lighting class imbalance using sample weighting trick if needed.
)

# ---------- Unfreeze top of backbone for fine-tuning ----------
# Unfreeze last N layers of base
# ---------- Unfreeze top of backbone for fine-tuning ----------
base = model.get_layer("resnet152v2")  # <- directly fetch backbone by name

if base is None:
    print("Warning: could not find backbone to unfreeze; skipping fine-tuning.")
else:
    base.trainable = True
    # Freeze earlier layers, unfreeze last N
    fine_tune_at = int(len(base.layers) * 0.6)  # unfreeze top 40%
    for i, layer in enumerate(base.layers):
        layer.trainable = i >= fine_tune_at

    # Recompile with lower LR
    opt_finetune = optimizers.Adam(learning_rate=LEARNING_RATE_FINETUNE)
    model.compile(optimizer=opt_finetune, loss=losses_dict, loss_weights=LOSS_WEIGHTS, metrics=metrics_dict)

    history_ft = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS_UNFREEZE,
        callbacks=cb
    )

# ---------- Evaluation on test set ----------
print("Evaluating on test set...")
test_pred = model.predict(test_ds, verbose=1)

# test_pred is list: [lighting_probs, undertone_probs, skin_probs]
lighting_probs = np.asarray(test_pred[0]).squeeze()
undertone_probs = np.asarray(test_pred[1])
skin_probs = np.asarray(test_pred[2])

lighting_preds = (lighting_probs >= 0.5).astype(int)
# convert undertone and skin preds to labels
undertone_preds = np.argmax(undertone_probs, axis=1)
skin_preds = np.argmax(skin_probs, axis=1)

# Ground truth arrays
y_lighting = test_df['lighting_label'].astype(int).values
y_undertone = test_df['undertone_id'].astype(int).values
y_skin = test_df['skin_type_id'].astype(int).values

# Overall metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

print("Lighting metrics (binary):")
print("Accuracy:", accuracy_score(y_lighting, lighting_preds))
print("Precision:", precision_score(y_lighting, lighting_preds, zero_division=0))
print("Recall (sensitivity):", recall_score(y_lighting, lighting_preds, zero_division=0))
print("Specificity:", None if len(np.unique(y_lighting))<2 else \
      (confusion_matrix(y_lighting, lighting_preds)[0,0] / (confusion_matrix(y_lighting, lighting_preds)[0,0] + confusion_matrix(y_lighting, lighting_preds)[0,1])))
print("F1:", f1_score(y_lighting, lighting_preds, zero_division=0))

print("\nUndertone classification report:")
print(classification_report(y_undertone, undertone_preds, target_names=le_undertone.classes_, zero_division=0))

print("\nSkin-type classification report:")
print(classification_report(y_skin, skin_preds, target_names=le_skin.classes_, zero_division=0))

# ---------- Sensitivity/Specificity by group ----------
def sens_spec_by_group(y_true_binary, y_pred_binary, group_values, group_name):
    df_eval = pd.DataFrame({'y_true': y_true_binary, 'y_pred': y_pred_binary, 'group': group_values})
    groups = df_eval['group'].unique()
    rows = []
    for g in groups:
        sub = df_eval[df_eval['group'] == g]
        if len(sub) == 0:
            continue
        cm = confusion_matrix(sub['y_true'], sub['y_pred'], labels=[0,1])
        tn, fp, fn, tp = (cm.ravel() if cm.size==4 else (cm[0,0], 0, 0, cm[0,0]))
        sens = tp / (tp + fn) if (tp + fn) > 0 else None
        spec = tn / (tn + fp) if (tn + fp) > 0 else None
        rows.append((g, len(sub), sens, spec, tp, fn, tn, fp))
    rows = sorted(rows, key=lambda r: r[1], reverse=True)
    print(f"\nSensitivity/Specificity by {group_name}:")
    print("group | n | sensitivity | specificity | tp | fn | tn | fp")
    for r in rows:
        print(r)

# If EVAL_GROUP_COL exists in test_df, compute sensitivity/specificity for lighting across groups
if EVAL_GROUP_COL in test_df.columns:
    group_vals = test_df[EVAL_GROUP_COL].astype(str).values
    sens_spec_by_group(y_lighting, lighting_preds, group_vals, EVAL_GROUP_COL)
else:
    print(f"Eval group column {EVAL_GROUP_COL} not present in test_df; skipping per-group sensitivity/specificity.")

# Save final model weights (optional)
final_weights_path = "/content/drive/MyDrive/skinterest_multitask_final.h5"
model.save_weights(final_weights_path)
print("Saved model weights to:", final_weights_path)

# ---------- End ----------


TensorFlow version: 2.20.0
Initial combined_df rows: 770
Note: image_1_shot_type not found. Creating placeholder 'lighting_label' = 0 for all (you should replace with a real column).
No undertone column found. Creating placeholder 'undertone_raw' with single class 'unknown'.
No Fitzpatrick column found. Creating placeholder 'skin_type_raw' with single class 'unknown'.
Undertone classes: ['unknown']
Skin-type classes: ['unknown']
Lighting positive count (HARSH=1): 0 / 770
lighting_label
0    770
Name: count, dtype: int64
undertone_raw
unknown    770
Name: count, dtype: int64
skin_type_raw
unknown    770
Name: count, dtype: int64
Split sizes: 539 115 116
Only one class present for lighting in train set; class weighting skipped.


Epoch 1/8




[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step - lighting_out_acc: 0.9028 - lighting_out_auc: 0.0000e+00 - lighting_out_loss: 0.1557 - loss: 0.1558 - skin_type_out_acc: 1.0000 - skin_type_out_loss: 0.0000e+00 - undertone_out_acc: 1.0000 - undertone_out_loss: 0.0000e+00 - val_lighting_out_acc: 1.0000 - val_lighting_out_auc: 0.0000e+00 - val_lighting_out_loss: 7.7138e-07 - val_loss: 8.4831e-07 - val_skin_type_out_acc: 1.0000 - val_skin_type_out_loss: 0.0000e+00 - val_undertone_out_acc: 1.0000 - val_undertone_out_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 2/8
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 2s/step - lighting_out_acc: 1.0000 - lighting_out_auc: 0.0000e+00 - lighting_out_loss: 1.1205e-06 - loss: 1.1214e-06 - skin_type_out_acc: 1.0000 - skin_type_out_loss: 0.0000e+00 - undertone_out_acc: 1.0000 - undertone_out_loss: 0.0000e+00 - val_lighting_out_acc: 1.0000 - val_lighting_out_auc: 0.0000e+00 - val_lighting_out_loss: 1.4356e-07 -

KeyboardInterrupt: 