<a href="https://colab.research.google.com/github/Zarathustrai/Zarathustrai.github.io/blob/main/%5BNN%5D_PALMS_DRAFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# PALMS
### Prerequisites

In [None]:
import tensorflow as tf
import pandas as pd
import os
import shutil
import numpy as np
import plotly.graph_objects as go

from PIL import Image
from keras.models import Model
from keras.layers import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Lambda
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from matplotlib import pyplot as plt
from tensorflow import keras
from sklearn.metrics import classification_report
from sklearn.metrics import r2_score

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:
device_name = tf.test.gpu_device_name()
# make sure it is GPU:0
print(device_name)

/device:GPU:0


In [None]:
# Splitting Dataset into Training and Validation set

class config:
    # specify path to the dataset
    # ! ADJUST AS NEEDED
    DATASET_PATH = 'drive/MyDrive/palm_data/Hands/Hands'
    CSV_PATH = 'drive/MyDrive/palm_data/HandInfo.csv'
    ROOT = os.getcwd()
    # path for saving output; specify the paths to our training and validation set
    OUTPUT = "output"
    TRAIN = "train"
    TEST = "test"
    VAL = "val"

    # set the crop size
    INPUT_CROPSIZE = 224

    # set the batch size and validation data split
    BATCH_SIZE = 32
    TEST_BATCH_SIZE = 64
    # TEST_BATCH_SIZE = 8
    # BATCH_SIZE = 8 ### for testing
    TRAIN_SPLIT = 0.9

    EPOCHS = 20
    # EPOCHS = 2 ### for testing
    LEARNING_RATE = 1e-3

### Loading Data

In [None]:
def copy_images(imagePaths, folder):
  if not os.path.exists(folder):
    os.makedirs(folder)
  for path in imagePaths:
    shutil.copy(config.DATASET_PATH + '/' + path, folder)

In [None]:
def plot_distribution(pd_series):
    labels = pd_series.value_counts().index.tolist()
    counts = pd_series.value_counts().values.tolist()
    
    pie_plot = go.Pie(labels=labels, values=counts, hole=.3)
    fig = go.Figure(data=[pie_plot])
    fig.update_layout(title_text='Distribution for %s' % pd_series.name)
    
    fig.show()

plot_distribution(df.gender)
plot_distribution(df.accessories)
plot_distribution(df.nailPolish)
plot_distribution(df.aspectOfHand)
plot_distribution(df.skinColor)
bins = [10, 20, 30, 40, 60, 80]
names = ['10-20', '20-30', '30-40', '40-60', '60-80']
age_binned = pd.cut(df['age'], bins, labels=names)
plot_distribution(age_binned)

NameError: ignored

In [None]:
# CREATE IMAGE PATHS
def create_paths(image_names):
    np.random.shuffle(image_names)
    trainPathsLen = int(len(image_names) * config.TRAIN_SPLIT)
    trainPaths = image_names[:trainPathsLen]
    testPaths = image_names[trainPathsLen:]
    trainPathsLen = int(trainPathsLen * config.TRAIN_SPLIT)
    trainPaths, valPaths = trainPaths[:trainPathsLen], trainPaths[trainPathsLen:]
    return trainPaths, valPaths, testPaths

In [None]:
# Convert aliases into ID
def alias_to_ID(dataset):
    dataset_dict = {
        'skin_id': {
            0: 'very fair',
            1: 'fair',
            2: 'medium',
            3: 'dark'
        },
        'gender_id': {
            0: 'male',
            1: 'female'
        },
        'palm_id': {
            0: 'palmar right',
            1: 'dorsal right',
            2: 'palmar left',
            3: 'dorsal left'
        }
    }

    dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())
    dataset_dict['skin_alias'] = dict((s, i) for i, s in dataset_dict['skin_id'].items())
    dataset_dict['palm_alias'] = dict((p, i) for i, p in dataset_dict['palm_id'].items())

    dataset['gender_id'] = dataset['gender'].map(lambda gender: dataset_dict['gender_alias'][gender])
    dataset['skin_id'] = dataset['skinColor'].map(lambda skinColor: dataset_dict['skin_alias'][skinColor])
    dataset['palm_id'] = dataset['aspectOfHand'].map(lambda aspectOfHand: dataset_dict['palm_alias'][aspectOfHand])
    max_age = dataset['age'].max()
    return df, max_age, dataset_dict


In [None]:
# Data augmentation
def preprocess_image(img_path):
    im = Image.open(img_path)
    im = im.resize((config.INPUT_CROPSIZE, config.INPUT_CROPSIZE))
    im = np.array(im) / 255.0
    im = tf.image.random_flip_left_right(im)
    im = tf.image.random_flip_up_down(im)
    im = tf.keras.preprocessing.image.random_rotation(im, 15)
    return im

In [None]:
# GENERATE BATCH BASED ON IMAGENAME
def generate_images(img_paths, batch_size, is_training):
    # arrays to store our batched data
    images, ages, skins, genders, accessories, palms, nails = [], [], [], [], [], [], []
    show = True
    while True:
        for path in img_paths:
            # find person row based on imageName
            person = df.iloc[df.index[df.imageName == path]]
            age = person['age']
            skin = person['skin_id']
            gender = person['gender_id']
            accessory = person['accessories']
            palm = person['palm_id']
            nail = person['nailPolish']
            im = preprocess_image(config.DATASET_PATH + '/' + path)  # full path
            # if show:
            #     plt.imshow(im, interpolation='nearest')
            #     plt.show()
            #     show = False

            ages.append(age / max_age)
            skins.append(skin)
            genders.append(gender)
            accessories.append(accessory)
            palms.append(palm)
            nails.append(nail)
            images.append(im)

            # yielding condition
            if len(images) >= batch_size:
                yield np.array(images), [np.array(ages), np.array(skins), np.array(genders), np.array(accessories),
                                         np.array(palms), np.array(nails)]
                images, ages, skins, genders, accessories, palms, nails = [], [], [], [], [], [], []
        if not is_training:
            break

In [None]:
# TESTING RESULTS
def evaluate(testPaths, model, max_age):
    test_generator = generate_images(testPaths, is_training=False, batch_size=config.TEST_BATCH_SIZE)
    age_pred, gender_pred, race_pred, palm_pred, accessories_pred, nail_pred = model.predict_generator(test_generator,
                                                               steps=len(testPaths) // config.TEST_BATCH_SIZE)
    test_generator = generate_images(testPaths, is_training=False, batch_size=config.TEST_BATCH_SIZE)

    images, age_true, gender_true, race_true, palm_true, accessories_true, nail_true = [], [], [], [], [], [], []
    for test_batch in test_generator:
        image = test_batch[0]
        labels = test_batch[1]

        images.extend(image)
        age_true.extend(labels[0])
        gender_true.extend(labels[1])
        race_true.extend(labels[2])
        palm_true.extend(labels[3])
        accessories_true.extend(labels[4])
        nail_true.extend(labels[5])

    age_true = np.array(age_true)
    gender_true = np.array(gender_true)
    race_true = np.array(race_true)
    palm_true = np.array(palm_true)
    accessories_true = np.array(accessories_true)
    nail_true = np.array(nail_true)

    race_true, gender_true, palm_true, accessories_true, nail_true = race_true.argmax(axis=-1), \
                                                                     gender_true.argmax(axis=-1), \
                                                                     palm_true.argmax(axis=-1), \
                                                                     accessories_true.argmax(axis=-1), \
                                                                     nail_true.argmax(axis=-1)
    gender_pred, race_pred, palm_pred, accessories_pred, nail_pred  = gender_pred.argmax(axis=-1), \
                                                                      race_pred.argmax(axis=-1), \
                                                                      palm_pred.argmax(axis=-1), \
                                                                      accessories_pred.argmax(axis=-1), \
                                                                      nail_pred.argmax(axis=-1)
    age_true = age_true * max_age
    age_pred = age_pred * max_age
    return age_true, race_true, gender_true, palm_true, accessories_true, nail_true, age_pred, gender_pred, race_pred, palm_pred, accessories_pred, nail_pred

In [None]:
# INFER STATS FROM TESTING + EXPORT TO TXT
def statistics(dataset_dict, age_true, race_true, gender_true, palm_true, accessories_true, nail_true, age_pred, gender_pred, race_pred, palm_pred, accessories_pred, nail_pred):

    if not os.path.exists("statistics"):
        os.mkdir("statistics")

    cr_gender = classification_report(gender_true, gender_pred)
    cr_skin = classification_report(race_true, race_pred)
    cr_palm = classification_report(palm_true, palm_pred)
    cr_accessories = classification_report(accessories_true, accessories_pred)
    cr_nail = classification_report(nail_true, nail_pred)

    gender = open("statistics/genderStat.txt", "w")
    gender.write(cr_gender)
    gender.close()

    skin = open("statistics/skinStat.txt", "w")
    skin.write(cr_skin)
    skin.close()

    palm = open("statistics/palmStat.txt", "w")
    palm.write(cr_palm)
    palm.close()

    accessories = open("statistics/accessoriesStat.txt", "w")
    accessories.write(cr_accessories)
    accessories.close()

    nail = open("statistics/nailStat.txt", "w")
    nail.write(cr_nail)
    nail.close()

    age = open("statistics/ageR2.txt", "w")
    age.write(str(r2_score(age_true, age_pred)))
    age.close()

In [None]:
# TRAIN / VALUATION RESULTS TO PNG GRAPHS
def plot_results(history):

    if not os.path.exists("images"):
        os.mkdir("images")

    fig = go.Figure()
    fig.add_trace(go.Scattergl(
        y=history.history['loss'],
        name='Train'))
    fig.add_trace(go.Scattergl(
        y=history.history['val_loss'],
        name='Valid'))
    fig.update_layout(height=500,
                      width=700,
                      title='Overall loss',
                      xaxis_title='Epoch',
                      yaxis_title='Loss')
    fig.show()
    fig.write_image("images/trainValLoss.png")

    plt.clf()
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        y=history.history['race_output_accuracy'],
        name='Train'))
    fig.add_trace(go.Scatter(
        y=history.history['val_race_output_accuracy'],
        name='Valid'))
    fig.update_layout(height=500,
                      width=700,
                      title='Accuracy for race feature',
                      xaxis_title='Epoch',
                      yaxis_title='Accuracy')
    fig.show()
    fig.write_image("images/VTRaceOutputAccuracy.png")

    plt.clf()
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        y=history.history['gender_output_accuracy'],
        name='Train'))
    fig.add_trace(go.Scatter(
        y=history.history['val_gender_output_accuracy'],
        name='Valid'))
    fig.update_layout(height=500,
                      width=700,
                      title='Accuracy for gender feature',
                      xaxis_title='Epoch',
                      yaxis_title='Accuracy')
    fig.show()
    fig.write_image("images/VTGenderOutputAccuracy.png")

    plt.clf()
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        y=history.history['accessories_output_accuracy'],
        name='Train'))
    fig.add_trace(go.Scatter(
        y=history.history['val_accessories_output_accuracy'],
        name='Valid'))
    fig.update_layout(height=500,
                      width=700,
                      title='Accuracy for accessories feature',
                      xaxis_title='Epoch',
                      yaxis_title='Accuracy')
    fig.show()
    fig.write_image("images/VTAccessoriesOutputAccuracy.png")

    plt.clf()
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        y=history.history['palm_output_accuracy'],
        name='Train'))
    fig.add_trace(go.Scatter(
        y=history.history['val_palm_output_accuracy'],
        name='Valid'))
    fig.update_layout(height=500,
                      width=700,
                      title='Accuracy for palm feature',
                      xaxis_title='Epoch',
                      yaxis_title='Accuracy')
    fig.show()
    fig.write_image("images/VTPalmOutputAccuracy.png")

    plt.clf()
    fig = go.Figure()
    fig.add_trace(go.Scattergl(
        y=history.history['age_output_mae'],
        name='Train'))
    fig.add_trace(go.Scattergl(
        y=history.history['val_age_output_mae'],
        name='Valid'))
    fig.update_layout(height=500,
                      width=700,
                      title='Mean Absolute Error for age feature',
                      xaxis_title='Epoch',
                      yaxis_title='Mean Absolute Error')
    fig.show()
    fig.write_image("images/VTAccessoriesOutputMAE.png")

In [None]:
# TRAIN / VALUATION RESULTS TO CSV
def train_val_results(history):

    if not os.path.exists("trainValRes"):
        os.mkdir("trainValRes")

    a = np.array(history.history['age_output_mae'])
    b = np.array(history.history['race_output_accuracy'])
    c = np.array(history.history['gender_output_accuracy'])
    d = np.array(history.history['nail_output_accuracy'])
    e = np.array(history.history['accessories_output_accuracy'])
    f = np.array(history.history['palm_output_accuracy'])

    loss = np.array(history.history['loss'])
    val_loss = np.array(history.history['val_loss'])

    results = pd.DataFrame({"age_mae": a, "race_acc": b, "gender_acc": c, "nail_acc": d, "accessories_acc": e, "palm_acc": f})
    results.to_csv("trainValRes/output.csv", index = False)

    a = np.array(history.history['val_age_output_mae'])
    b = np.array(history.history['val_race_output_accuracy'])
    c = np.array(history.history['val_gender_output_accuracy'])
    d = np.array(history.history['val_nail_output_accuracy'])
    e = np.array(history.history['val_accessories_output_accuracy'])
    f = np.array(history.history['val_palm_output_accuracy'])
    val_results = pd.DataFrame({"val_age_mae": a, "val_race_acc": b, "val_gender_acc": c, "val_nail_acc": d, "val_accessories_acc": e, "val_palm_acc": f})
    val_results.to_csv("trainValRes/val_output.csv", index = False)

    loss_results = pd.DataFrame({"loss": loss, "val_loss": val_loss})
    loss_results.to_csv("trainValRes/loss_output.csv", index = False)

In [None]:
# BUILD MODEL
class MultiModel():
    def make_default_hidden_layers(self, inputs):
        """
        Used to generate a default set of hidden layers. The structure used in this network is defined as:
        Conv2D -> BatchNormalization -> Pooling -> Dropout
        """
        x = Conv2D(16, (3, 3), padding="same")(inputs)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)
        return x

    def build_accessories_branch(self, inputs, num_accessories =2):
        """
        Used to build the accessories branch of our palm recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_accessories)(x)
        x = Activation("sigmoid", name="accessories_output")(x)
        return x

    def build_palm_branch(self, inputs, num_palm = 4):
        """
        Used to build the palm branch of our palm recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_palm)(x)
        x = Activation("softmax", name="palm_output")(x)
        return x    

    def build_race_branch(self, inputs, num_skinColor = 4):
        """
        Used to build the skin color branch of our palm recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_skinColor)(x)
        x = Activation("softmax", name="race_output")(x)
        return x


    def build_gender_branch(self, inputs, num_genders=2):
        """
        Used to build the gender branch of our palm recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_genders)(x)
        x = Activation("sigmoid", name="gender_output")(x)
        return x

    def build_nail_branch(self, inputs, num_nails = 2):
        """
        Used to build the nail branch of our palm recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_nails)(x)
        x = Activation("sigmoid", name="nail_output")(x)
        return x

    def build_age_branch(self, inputs):   
        """
        Used to build the age branch of our palm recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1)(x)
        x = Activation("linear", name="age_output")(x)
        return x

    def assemble_full_model(self, width, height):
        """
        Used to assemble our multi-output model CNN.
        """
        input_shape = (height, width, 3)
        inputs = Input(shape=input_shape)
        age_branch = self.build_age_branch(inputs)
        race_branch = self.build_race_branch(inputs)
        gender_branch = self.build_gender_branch(inputs)
        palm_branch = self.build_palm_branch(inputs)
        accessories_branch = self.build_accessories_branch(inputs)
        nail_branch = self.build_nail_branch(inputs)

        model = Model(inputs=inputs,
                     outputs = [age_branch, gender_branch, race_branch, palm_branch, accessories_branch, nail_branch],
                     name="palm_net")
        return model
    
model = MultiModel().assemble_full_model(width = config.INPUT_CROPSIZE, height = config.INPUT_CROPSIZE)


In [None]:
# COMPILE + TRAIN/TEST MODEL
df = pd.read_csv(config.CSV_PATH)
image_names = df.imageName
trainPaths, valPaths, testPaths = create_paths(image_names)
copy_images(trainPaths, config.TRAIN)
copy_images(valPaths, config.VAL)
copy_images(testPaths, config.TEST)

df, max_age, dataset_dict = alias_to_ID(df)

model = MultiModel().assemble_full_model(width=config.INPUT_CROPSIZE, height=config.INPUT_CROPSIZE)
opt = tf.keras.optimizers.Adam(learning_rate=config.LEARNING_RATE, decay=config.LEARNING_RATE / config.EPOCHS)
model.compile(optimizer=opt,
              loss={
                  'age_output': 'mse',
                  'race_output': 'categorical_crossentropy',
                  'gender_output': 'binary_crossentropy',
                  'nail_output': 'binary_crossentropy',
                  'accessories_output': 'binary_crossentropy',
                  'palm_output': 'categorical_crossentropy'},
              loss_weights={
                  'age_output': 4.,
                  'race_output': 1.2,
                  'gender_output': 0.1,
                  'nail_output': 0.1,
                  'accessories_output': 0.1,
                  'palm_output': 1.2},
              metrics={
                  'age_output': 'mae',
                  'race_output': 'accuracy',
                  'gender_output': 'accuracy',
                  'nail_output': 'accuracy',
                  'accessories_output': 'accuracy',
                  'palm_output': 'accuracy'})

train_gen = generate_images(trainPaths, batch_size=config.BATCH_SIZE, is_training = True)
valid_gen = generate_images(valPaths, batch_size=config.BATCH_SIZE, is_training= True)

callbacks = [
    keras.callbacks.ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]

history = model.fit(train_gen,
                    steps_per_epoch=len(trainPaths) // config.BATCH_SIZE,
                    epochs=config.EPOCHS,
                    callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valPaths) // config.BATCH_SIZE
                    )

train_val_results(history)
plot_results(history)
age_true, race_true, gender_true, palm_true, accessories_true, nail_true, age_pred, gender_pred, race_pred, palm_pred, accessories_pred, nail_pred = evaluate(testPaths, model, max_age)
statistics(dataset_dict, age_true, race_true, gender_true, palm_true, accessories_true, nail_true, age_pred, gender_pred, race_pred, palm_pred, accessories_pred, nail_pred)

  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
 57/280 [=====>........................] - ETA: 16:50 - loss: -83.8105 - age_output_loss: 0.0270 - gender_output_loss: -722.9651 - race_output_loss: 6.1230 - palm_output_loss: 1.2436 - accessories_output_loss: -204.8823 - nail_output_loss: 0.2638 - age_output_mae: 0.1002 - gender_output_accuracy: 0.2933 - race_output_accuracy: 0.2588 - palm_output_accuracy: 0.2834 - accessories_output_accuracy: 0.4578 - nail_output_accuracy: 0.4962

In [None]:
print(history.history['race_output_accuracy'], history.history['gender_output_accuracy'], history.history['palm_output_accuracy'], history.history['accessories_output_accuracy'])

[0.3068181872367859, 0.25, 0.25, 0.20454545319080353, 0.21590909361839294] [0.5340909361839294, 0.46590909361839294, 0.5568181872367859, 0.5568181872367859, 0.39772728085517883] [0.27272728085517883, 0.27272728085517883, 0.28409090638160706, 0.17045454680919647, 0.20454545319080353] [0.5113636255264282, 0.4886363744735718, 0.5227272510528564, 0.40909090638160706, 0.5113636255264282]
