In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from tensorflow import keras
%pip install git+https://github.com/keras-team/keras-preprocessing.git
from keras_preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
import gc

In [None]:
import pandas as pd

img_dir = '/content/drive/My Drive/CelebA/Img/img_align_celeba_32500'
# Load labels.csv
labels_df = pd.read_csv('/content/drive/My Drive/CelebA/Anno/labels_even_clean.csv', header=0, names=['Filename', 'High_Cheekbones', 'Mouth_Slightly_Open', 'Smiling'])
# Define list of class names (subset of the classes listed in `list_attr_celeba.txt`)
class_names = ["High_Cheekbones", "Mouth_Slightly_Open", "Smiling"]

In [None]:
train, valid = train_test_split(labels_df, test_size = 0.3)
valid, test = train_test_split(valid, test_size=0.2)
train_features = train[["High_Cheekbones", "Mouth_Slightly_Open","Smiling"]]
# print length of train, valid and test
print("train: ", len(train))
print("valid: ", len(valid))
print("test: ", len(test))
#train,valid,test

In [None]:
# remove a random row from train
train = train.drop(train.sample().index)

In [None]:
#train

In [None]:
'''
import pandas as pd
import re

df = labels_df

# Function to check if a filename is valid
def is_valid_filename(filename):
    pattern = r'^\d{6}\.jpg$'  # Regular expression for "000001.jpg" format
    return re.match(pattern, filename) is not None

# Check for invalid filenames
invalid_filenames = df[~df['Filename'].apply(is_valid_filename)]

# Print rows with invalid filenames
for index, row in invalid_filenames.iterrows():
    print(f"Invalid filename '{row['Filename']}' in row {index + 1}")

# If you want to remove rows with invalid filenames from the DataFrame, you can use the following line
# df = df[df['Filename'].apply(is_valid_filename)]
'''

In [None]:


# Reproducability
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
set_seed()

def custom_augmentation(np_tensor):

  def random_contrast(np_tensor):
    return tf.image.random_contrast(np_tensor, 0.5, 2)

  augmnted_tensor = random_contrast(np_tensor)
  return np.array(augmnted_tensor)

# Load training and validation sets
datagen=ImageDataGenerator(rescale=1./255.,
                          rotation_range=45,
                          horizontal_flip=True,
                          vertical_flip=True,
                          preprocessing_function=custom_augmentation)
valid_datagen=ImageDataGenerator(rescale=1./255.,
                                rotation_range=45,
                                horizontal_flip=True,
                                vertical_flip=True,
                                preprocessing_function=custom_augmentation)
test_datagen=ImageDataGenerator(rescale=1./255.)

train_generator = datagen.flow_from_dataframe(
    dataframe=train,
    directory=img_dir,
    x_col='Filename',
    y_col=class_names,
    save_format='jpg',
    batch_size=30,
    seed=27,
    shuffle=True,
    class_mode="raw",
    target_size=(218, 178)
)

valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=valid,
    directory=img_dir,
    x_col='Filename',
    y_col=class_names,
    save_format='jpg',
    batch_size=30,
    seed=9,
    shuffle=True,
    class_mode="raw",
    target_size=(218, 178)
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test,
    directory=img_dir,
    x_col='Filename',
    batch_size=15,
    seed=3,
    shuffle=False,
    class_mode=None,
    target_size=(218, 178)
)

# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE
ds_train = tf.data.Dataset.from_generator(
    lambda: train_generator,
    output_types=(tf.float32, tf.float32),
    output_shapes=([30, 218, 178, 3], [None, len(class_names)])
).map(convert_to_float).cache().prefetch(buffer_size=AUTOTUNE)

ds_valid = tf.data.Dataset.from_generator(
    lambda: valid_generator,
    output_types=(tf.float32, tf.float32),
    output_shapes=([30, 218, 178, 3], [None, len(class_names)])
).map(convert_to_float).cache().prefetch(buffer_size=AUTOTUNE)


In [None]:
import tensorflow.keras.backend as K
K.clear_session()

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import Precision, Recall, AUC

# Load pre-trained VGG16 model without top layers
base_model = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(218, 178, 3)
)
#base_model.trainable = False
base_model.trainable = True
base_model.summary()

num_classes = len(class_names)

# Define input layer (adjust the shape based on your input images)
input_tensor = Input(shape=(218, 178, 3), name='input_01')

# Pass input through VGG16 base model
x = base_model(input_tensor, training=False)

# Add GlobalAveragePooling2D layer to reduce spatial dimensions
x = GlobalAveragePooling2D()(x)

# Add output layers for class predictions
output_1 = Dense(1, activation='sigmoid', name='High_Cheekbones')(x)
output_2 = Dense(1, activation='sigmoid', name='Mouth_Slightly_Open')(x)
output_3 = Dense(1, activation='sigmoid', name='Smiling')(x)

# Create the model
model = Model(inputs=input_tensor, outputs = [output_1, output_2, output_3], name='VGG16_transfer')

base_learning_rate = 0.001

# Fine-tune from this layer onwards
fine_tune_at = 8

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable = False

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate/10),
    loss={'High_Cheekbones': 'binary_crossentropy', 'Mouth_Slightly_Open': 'binary_crossentropy', 'Smiling': 'binary_crossentropy'},
    metrics=['accuracy']
)

# Print model summary
model.summary()

In [None]:
#  how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))


In [None]:
# save model summary to file
'''with open('/content/drive/My Drive/CelebA/Saved_models/VGG16_Transfer_3_Classes/metrics/fine_tuning_summary.txt','w') as fh:
    # Pass the file handle in as a lambda function to make it callable
    model.summary(print_fn=lambda x: fh.write(x + '\n'))'''

In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
tf.keras.utils.plot_model(base_model, show_shapes=True)

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping

check_save_dir = '/content/drive/My Drive/CelebA/VGG16_Checkpoints/Checkpoints_Frozen_Uncropped/'
gc.enable()

checkpoint = ModelCheckpoint(
    check_save_dir,
    save_weights_only=False,
    save_best_only=True,
    monitor='loss',
    mode='min',
    verbose=1
    )

# define early stopping callback
early_stopping = EarlyStopping(
    monitor='loss', # metric to monitor for early stopping
    patience=3, # number of epochs to wait before stopping
    restore_best_weights=True, # restore the best model weights found during training
)

In [None]:
def generator_wrapper(dataset):
    for batch_x, batch_y in dataset:
        yield (batch_x, [batch_y[:, i] for i in range(3)])

ds_train_wrapped = generator_wrapper(ds_train)
ds_valid_wrapped = generator_wrapper(ds_valid)

In [None]:
%cd drive/My Drive/CelebA

In [None]:
#model.load_weights('./VGG16_Checkpoints/Checkpoints_Frozen_Uncropped/')

Train Data: 6,300 rows   
Train Batch Size: 30 images  
Train Step Size = $6300/30 = 210$  

-------
Valid Data: 2160 rows  
Valid Batch Size: 30 images  
Valid Step Size: $2160 / 30 = 72$  

In [None]:
history = model.fit(
                    ds_train_wrapped,
                    epochs=8,
                    validation_data=ds_valid_wrapped,
                    batch_size=30,
                    steps_per_epoch=210,
                    validation_steps=72,
                    verbose=1,
                    callbacks=[checkpoint, early_stopping])

tf.keras.saving.save_model(
    model, './Saved_models/VGG16_Transfer_3_Classes/8E_fine_tuned_uncropped', overwrite=False, save_format='tf',
)
# convert history.history object to dataframe
history_frame = pd.DataFrame(history.history)

history_frame.to_csv('/content/drive/My Drive/CelebA/Saved_models/VGG16_Transfer_3_Classes/metrics/8E_fine_tuned_uncropped.csv', index=False)

In [None]:
history.history

In [None]:
gc.collect()

In [None]:
tf.keras.saving.save_model(
    model, './Saved_models/VGG16_Transfer_3_Classes/10E_fine_tuned_uncropped', overwrite=False, save_format='tf',
)

In [None]:
reconstructed_model = keras.models.load_model("./Saved_models/VGG16_Transfer_3_Classes/8+4E_fine_tuned_uncropped")
#reconstructed_model.summary()

In [None]:
reconstructed_model.summary()

In [None]:
reconstructed_model.load_weights('./VGG16_Checkpoints/Checkpoints_Frozen_Uncropped/')

In [None]:
history = reconstructed_model.fit(
                    ds_train_wrapped,
                    epochs=5,
                    validation_data=ds_valid_wrapped,
                    batch_size=30,
                    steps_per_epoch=210,
                    validation_steps=72,
                    verbose=1,
                    callbacks=[checkpoint, early_stopping])

tf.keras.saving.save_model(
    reconstructed_model, './Saved_models/VGG16_Transfer_3_Classes/8+4E_fine_tuned_uncropped', overwrite=False, save_format='tf'
)
# convert history.history object to dataframe
history_frame = pd.DataFrame(history.history)
#print(history_frame)
history_frame.to_csv('/content/drive/My Drive/CelebA/Saved_models/VGG16_Transfer_3_Classes/metrics/8+4E_fine_tuned_uncropped.csv', index=False)

In [None]:
tf.keras.saving.save_model(
    reconstructed_model, './Saved_models/VGG16_Transfer_3_Classes/10E_head_only_uncropped', overwrite=False, save_format='tf'
)

In [None]:
# set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=16, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore")



In [None]:
# convert history.history object to dataframe
history_frame = pd.DataFrame(history.history)

In [None]:
#print(history_frame)
history_frame.to_csv('/content/drive/My Drive/CelebA/Saved_models/VGG16_Transfer_3_Classes/metrics/10E_head_only_uncropped.csv', index=False)

In [None]:
test_generator.reset()

In [None]:
batch = test_generator.next()  # Get the next batch of 15 images
predictions = reconstructed_model.predict(batch)

In [None]:
print(predictions)

In [None]:
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
import textwrap

class_names = ["High_Cheekbones", "Mouth_Slightly_Open", "Smiling"]

filenames = test['Filename'][15:30]


plt.figure(figsize=(18, 16))

for i in range(15):
    image_preds = [predictions[j][i] for j in range(3)]
    class_percentages = ['%.2f' % (pred * 100) + '%' for pred in image_preds]
    xLabel = f"{class_names[0]}, {class_percentages[0]}; {class_names[1]}, {class_percentages[1]}; {class_names[2]}, {class_percentages[2]}"

    wrapped_label = '\n'.join(textwrap.wrap(xLabel, width=30))
    plt.subplot(5, 3, i + 1)  # Changed to 5 rows and 3 columns for 15 images
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(Image.open(os.path.join(img_dir, filenames.iloc[i])))
    plt.xlabel(wrapped_label, fontsize=8)

plt.tight_layout()
plt.show()


In [None]:

history_frame.loc[:, [
                  'val_High_Cheekbones_accuracy',
                  'val_Smiling_accuracy',
                  'val_Mouth_Slightly_Open_accuracy',
                     ]].plot()
plt.xlabel('Epochs')
plt.ylabel('Validation Accuracy')
plt.legend(loc='center left')
plt.show()

In [None]:
val_accuracies = []
for col_name in history_frame.columns:
    if col_name.startswith('val_') and col_name.endswith('accuracy'):
        val_accuracies.append(history_frame[col_name])


In [None]:
val_losses = []
for col_name in history_frame.columns:
    if col_name.startswith('val_') and col_name.endswith('loss'):
        val_losses.append(history_frame[col_name])

In [None]:
import numpy as np
avg_val_losses = np.mean(val_losses, axis=0)
avg_val_accuracies = np.mean(val_accuracies, axis=0)

In [None]:
import matplotlib.pyplot as plt
plt.plot(avg_val_losses)
plt.title('Average Validation Loss. vs. Epochs (epochs 1-8)')
plt.xlabel('Epochs')
plt.ylabel('Average Validation Loss')
plt.show()

plt.plot(avg_val_accuracies)
plt.title('Average Validation Acc. vs. Epochs (epochs 1-8)')
plt.xlabel('Epochs')
plt.ylabel('Average Validation Accuracy')
plt.show()
