In [None]:
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
import keras

data_dir = ""
model_dir = ""
figure_dir = ""

def load_body_data():
    train : dict[str, np.ndarray] = dict()
    test  : dict[str, np.ndarray] = dict()

    train["img"] = np.load(data_dir+"body_train_X1.npy")
    train["sexs"] = np.load(data_dir+"body_sex_train.npy")
    train["ages"] = np.load(data_dir+"body_age_train.npy")
    train["labels"] = np.load(data_dir+"body_train_y.npy")
    test["img"] = np.load(data_dir+"body_test_X1.npy")
    test["sexs"] = np.load(data_dir+"body_sex_test.npy")
    test["ages"] = np.load(data_dir+"body_age_test.npy")
    test["labels"] = np.load(data_dir+"body_test_y.npy")

    return train, test

def split_face_data(train, test, key="img"):

    X = train[key]
    y = train["labels"].reshape(-1).astype(np.int32)

    test_X = test[key]
    test_y = test["labels"].reshape(-1).astype(np.int32)

    indices = np.arange(len(X))
    np.random.seed(123)
    np.random.shuffle(indices)
    X = X[indices]
    y = y[indices]
    age = np.minimum(train["ages"].reshape(-1)[indices] // 20, 4)

    combined_data = np.column_stack((y, age))
    stratified_splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2,  random_state=123)

    # Perform stratified split
    for train_index, val_index in stratified_splitter.split(combined_data, combined_data[:, 0]):
        train_img, valid_img = X[train_index], X[val_index]
        train_labels, valid_labels = y[train_index], y[val_index]

    t_total  = len(train_labels)
    t_P = train_labels.sum()
    t_N = t_total - t_P

    v_total = len(valid_labels)
    v_P = valid_labels.sum()
    v_N = v_total - v_P

    print("X_train shape:", train_img.shape)
    print("y_train shape:", train_labels.shape)
    print("X_val shape:", valid_img.shape)
    print("y_val shape:", valid_labels.shape)
    print("train P:{:.4f}, N:{:.4f}, total:{}".format(t_P/t_total, t_N/t_total, t_total))
    print("valid P:{:.4f}, N:{:.4f}, total:{}".format(v_P/v_total, v_N/v_total, v_total))

    return train_img, train_labels, valid_img, valid_labels, test_X, test_y

In [None]:
# @title evaluate
from sklearn.metrics import *
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import sklearn


class evaluate:
  def __init__(self, labels, predict, val_labels, val_predict, sex, age):
    self.labels = labels.squeeze()
    self.predict = predict.squeeze()
    self.val_labels = val_labels.squeeze()
    self.val_predict = val_predict.squeeze()
    self.figsize = (10, 10)
    self.threshold = 0.5
    self.sex = sex
    self.age = age

  def set_threshold(self):
    # calculate Youden’s index in valid set
    val_label = self.val_labels.squeeze()
    val_predict = self.val_predict.squeeze()
    def calculate_Y_index(threshold):
      TP = np.logical_and(val_label >= threshold, val_predict >= threshold).sum()
      TN = np.logical_and(val_label < threshold, val_predict <  threshold).sum()
      FP = np.logical_and(val_label < threshold, val_predict >= threshold).sum()
      FN = np.logical_and(val_label >= threshold, val_predict <  threshold).sum()

      assert TP + FN + TN + FP == len(val_label)
      assert TP + FN == val_label.sum()
      assert TN + FP == len(val_label) - val_label.sum()
      Sensitivity = TP / (TP + FN)
      Specificity = TN / (FP + TN)
      return Sensitivity + Specificity

    thresholds = np.sort(val_predict)[1:-1]
    y = np.vectorize(calculate_Y_index)(thresholds)
    self.threshold = thresholds[np.argmax(y)]


  def result(self, labels, predict):
    if len(labels) == 0 or len(scores) == 0:
      labels = self.labels
      predict = self.predict

    labels = labels.squeeze()
    predict = predict.squeeze()
    assert len(labels) == len(predict)

    threshold = self.threshold

    acc = accuracy_score(labels, predict>=threshold)
    auc = roc_auc_score(labels, predict)

    acc = round(acc, 4)
    auc = round(auc, 4)

    print(labels.shape, predict.shape)
    cm = confusion_matrix(labels, predict >= self.threshold)
    TP = cm[1][1]
    TN = cm[0][0]
    FP = cm[0][1]
    FN = cm[1][0]

    print(f"{TP=}, {TN=}, {FP=}, {FN=}")


    assert TP + FN == labels.sum()
    assert TN + FP == labels.shape[0] - labels.sum()

    Sensitivity = round(TP / (TP + FN), 4)
    Specificity = round(TN / (FP + TN), 4)
    if TP + FP == 0 or TN + FN == 0:
      PPV = None
      NPV = None
    else:
      PPV = round(TP / (TP + FP), 4)
      NPV = round(TN / (TN + FN), 4)

    return (Sensitivity, Specificity, PPV, NPV, acc, auc)
def group_age_sex(arr, age, sex):

    predict_0_19 =  arr[age<20]
    predict_20_39 = arr[np.logical_and(age>=20, age <40)]
    predict_40_59 = arr[np.logical_and(age>=40, age <60)]
    predict_60 = arr[age>=60]
    predict_20 = arr[age>=20]
    predict_M = arr[sex=='남']
    predict_F = arr[sex=='여']

    return (predict_0_19, predict_20_39, predict_40_59, predict_60, predict_20, predict_M, predict_F)


In [None]:
# @title load data
import os
import numpy as np

train, test = load_body_data()
train_img, train_labels, valid_img, valid_labels, test_X, test_y = split_face_data(train, test)

test_ages = test['ages']
test_sexs = test['sexs']


X_train shape: (1121, 608, 224, 3)
y_train shape: (1121,)
X_val shape: (281, 608, 224, 3)
y_val shape: (281,)
train P:0.7556, N:0.2444, total:1121
valid P:0.7544, N:0.2456, total:281


In [None]:
import tensorflow as tf
img_height = 608
img_width = 224
img_size = (img_height, img_width, 3)
train_ds = tf.data.Dataset.from_tensor_slices((train_img, train_labels))
val_ds = tf.data.Dataset.from_tensor_slices((valid_img, valid_labels))
test_ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))

In [None]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 100

train_ds = train_ds.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
val_ds = val_ds.batch(BATCH_SIZE)
test_ds = test_ds.batch(BATCH_SIZE)


In [None]:
from tensorflow import keras
from tensorflow.keras import regularizers

METRICS = [
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.AUC(name='auc',num_thresholds=1000),
]




In [None]:
neg = 69
pos = 212
total = neg+pos

weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

Weight for class 0: 2.04
Weight for class 1: 0.66


In [None]:
pos/total


0.7544483985765125

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Sequential, regularizers
from tensorflow.keras.initializers import Constant, glorot_normal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau


def get_CNN(learning_rate, dropout_rate, num_layers, num_filters, seed=0, output_bias=None):
    if output_bias is not None:
        output_bias = Constant(output_bias)

    model = Sequential()
    model.add(layers.Rescaling(-1/127.5,offset=1.0, input_shape=img_size))

    num_layers = int(num_layers)
    num_filters = int(num_filters)

    for i in range(0, 3):
        model.add(layers.MaxPooling2D((2, 2)))

        for _ in range(num_layers):
            model.add(layers.Conv2D(num_filters*pow(2, i), (3, 3), padding='same', activation=None, kernel_initializer=glorot_normal(seed=seed)))
            model.add(layers.BatchNormalization())
            model.add(layers.ReLU())

    model.add(layers.GlobalMaxPooling2D())
    model.add(layers.Dropout(dropout_rate, seed = seed))

    model.add(layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.001), bias_initializer=output_bias, kernel_initializer=glorot_normal(seed=0)))

    optimizer = Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=METRICS)

    return model


In [None]:

import os
from datetime import datetime

# datetime object containing current date and time

lr = 1e-3
dr = 0.3
seed = 1234
initial_bias = np.log([pos/neg])
model = get_CNN(lr, dr, 2, 48, seed= seed, output_bias=initial_bias)


now = datetime.now()
dt_string = now.strftime("%d_%m_%H_%M")
print(dt_string)
checkpoint_path = dt_string+"/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

train_ds = tf.data.Dataset.from_tensor_slices((train_img, train_labels))
val_ds = tf.data.Dataset.from_tensor_slices((valid_img, valid_labels))
test_ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))

BATCH_SIZE = 16

train_ds = train_ds.batch(BATCH_SIZE)
val_ds = val_ds.batch(32)
test_ds = test_ds.batch(32)

data_augmentation = keras.Sequential([
    layers.RandomZoom(0.05, fill_mode="nearest"),
    # layers.RandomCrop(224, 224),
    layers.GaussianDropout(0.005),
    layers.RandomBrightness((-0.01, 0.01), value_range=(0, 255)),
])
train_ds_augmented = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y)).prefetch(
        tf.data.AUTOTUNE)

callbacks=[ModelCheckpoint(filepath=checkpoint_path,
                           save_weights_only=True,
                           verbose=0,
                           save_freq="epoch"),
           EarlyStopping(monitor='val_auc',
                         verbose=1,
                         patience=4,
                         mode='max',
                         min_delta = 0.002,
                         restore_best_weights=True),
          ]
results = model.evaluate(test_ds, verbose=1)
history = model.fit(train_ds_augmented, epochs=20,
                    validation_data=val_ds,
                    callbacks=callbacks,
                    class_weight=class_weight)

02_07_11_29
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 10: early stopping


In [None]:
# model.load_weights(checkpoint_filepath)
train_results = model.evaluate(train_ds)
test_results = model.evaluate(test_ds)
print("         train | test")
for name, train_value, test_value in zip(model.metrics_names, train_results, test_results):
  print("{:8s} {:.4f}, {:.4f}".format(name, train_value, test_value))
print()

         train | test
loss     0.6783, 0.6783
accuracy 0.6851, 0.6862
auc      0.5731, 0.5797



In [None]:
model.save(model_dir+"small_body")