# Dharmi Gala
### Machine Learning & Predictive Analytics
### Final Project: Face2BMI

In [None]:
!pip install git+https://github.com/rcmalli/keras-vggface.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/rcmalli/keras-vggface.git
  Cloning https://github.com/rcmalli/keras-vggface.git to /tmp/pip-req-build-70gs824y
  Running command git clone --filter=blob:none --quiet https://github.com/rcmalli/keras-vggface.git /tmp/pip-req-build-70gs824y
  Resolved https://github.com/rcmalli/keras-vggface.git to commit bee35376e76e35d00aeec503f2f242611a97b38a
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
!pip install Keras-Applications

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
import os
from PIL import Image
import numpy as np
from tqdm.notebook import tqdm
import tensorflow as tf
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
import keras
from keras_vggface.vggface import VGGFace
from tensorflow.keras.applications import VGG16, resnet
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dropout, BatchNormalization

In [None]:
# Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Path to the CSV file
csv_file = '/content/drive/MyDrive/Final Project/Data/data.csv'

# Path to the folder containing the image files
image_folder = '/content/drive/MyDrive/Final Project/Data/Images/'

In [None]:
# Load the metadata from the CSV file
metadata = pd.read_csv(csv_file)
metadata.head()

Unnamed: 0.1,Unnamed: 0,bmi,gender,is_training,name
0,0,34.207396,Male,1,img_0.bmp
1,1,26.45372,Male,1,img_1.bmp
2,2,34.967561,Female,1,img_2.bmp
3,3,22.044766,Female,1,img_3.bmp
4,4,37.758789,Female,1,img_4.bmp


In [None]:
image_list = os.listdir(image_folder)

In [None]:
metadata = metadata[metadata['name'].isin(image_list)]

In [None]:
train = metadata[metadata['is_training'] == 1]  
test = metadata[metadata['is_training'] == 0]

print(train.shape, test.shape)

(3210, 5) (752, 5)


In [None]:
# Preprocess the images
def preprocess_image(img_path):
    img = image.load_img(img_path, target_size = (224, 224))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis = 0)
    img = preprocess_input(img)
    return img

In [None]:
train_images = []
train_labels = []
for img_file_name in tqdm(train['name']):
  train_images.append(preprocess_image(image_folder + img_file_name))
  train_labels.append(metadata[metadata['name'] == img_file_name].reset_index(drop = True)['bmi'].values[0])

  0%|          | 0/3210 [00:00<?, ?it/s]

In [None]:
test_images = []
test_labels = []

for img_file_name in tqdm(test['name']):
  test_images.append(preprocess_image(image_folder + img_file_name))
  test_labels.append(metadata[metadata['name'] == img_file_name].reset_index(drop = True)['bmi'].values[0])

  0%|          | 0/752 [00:00<?, ?it/s]

In [None]:
train_images = np.array(train_images).reshape(-1, 224, 224, 3)/255.
test_images = np.array(test_images).reshape(-1, 224, 224, 3)/255.

In [None]:
# Convert the test images and labels to numpy arrays
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# Create an instance of the ImageDataGenerator
data_generator = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Define the desired number of augmented images per original image
num_augmented_images = 1

# Create empty lists to hold the augmented images and labels
augmented_images = []
augmented_labels = []

# Loop over the original images and labels
for image, label in tqdm(zip(train_images, train_labels)):
    # Expand the dimensions of the image to match the expected input shape of the data generator
    image = np.expand_dims(image, axis=0)

    # Generate augmented images and labels
    augmented_image_generator = data_generator.flow(image, batch_size=1)
    for _ in range(num_augmented_images):
        augmented_image = next(augmented_image_generator)[0]
        augmented_images.append(augmented_image)
        augmented_labels.append(label)

# Convert the augmented images and labels to numpy arrays
augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)

0it [00:00, ?it/s]

In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from keras.optimizers import Adam

# Load the pre-trained VGG Face model without the top (fully connected) layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# base_model = VGGFace(model='vgg16', include_top=False, input_shape=(224, 224, 3))
# Freeze the base model's layers
for layer in base_model.layers:
    layer.trainable = False

# Create a new model and add the pre-trained base model as a layer
# Create the model architecture
model_0 = Sequential()
model_0.add(base_model)
model_0.add(Flatten())
model_0.add(Dense(256, activation='relu'))
model_0.add(Dropout(0.5))
model_0.add(Dense(1, activation='linear'))  # Output layer with linear activation
model_0.compile(optimizer=Adam(lr=0.0001), loss='mean_absolute_error')

with tf.device('/gpu:0'):
  model_0.fit(train_images, train_labels, epochs=15, batch_size=32)

from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

# Assuming you have the true labels and model predictions in variables true_labels and predicted_labels
predicted_labels = model_0.predict(test_images)

# Compute mean absolute error (MAE)
mse = mean_squared_error(test_labels, predicted_labels)
print("Mean Squared Error (MAE):", mse)

# Compute mean absolute error (MAE)
mae = mean_absolute_error(test_labels, predicted_labels)
print("Mean Absolute Error (MAE):", mae)

# Calculate the correlation coefficient
correlation_matrix = np.corrcoef(predicted_labels.flatten(), test_labels.flatten())
correlation_coefficient = correlation_matrix[0, 1]
print("Correlation Coefficient:", correlation_coefficient)

In [None]:
model_0.save('/content/drive/MyDrive/Final Project/cv_model.h5')

In [None]:
from keras.applications import VGG16
base_model = VGGFace(model='vgg16', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
model = Model(inputs=base_model.input, outputs=x)

In [None]:
model.save('/content/drive/MyDrive/Final Project/model.h5')



In [None]:
def extract_features(image_array):
    img = np.expand_dims(image_array, axis=0)
    features = model.predict(img,verbose=0)
    return features.flatten()

In [None]:
X_train_features = model.predict(augmented_images)



In [None]:
X_test_features = model.predict(test_images)



In [None]:
from sklearn.svm import SVR
svm = SVR()
svm.fit(X_train_features, augmented_labels)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
y_pred = svm.predict(X_test_features)
mae = mean_absolute_error(test_labels, y_pred)
print("Mean Absolute Error:", mae)
mse = mean_squared_error(test_labels, y_pred)
print("Mean Squared Error:", mse)
from scipy.stats import pearsonr
corr, _ = pearsonr(test_labels, y_pred)
print("Pearson Correlation Coefficient:", corr)

Mean Absolute Error: 5.298338076066529
Mean Squared Error: 57.364768942644446
Pearson Correlation Coefficient: 0.6375330319206938


In [None]:
import pickle

# save
with open('/content/drive/MyDrive/Final Project/svm.pkl','wb') as f:
    pickle.dump(svm,f)