<a href="https://colab.research.google.com/github/imad267/Thesis-Work/blob/main/Gender_and_Age_Classification_Final_txt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Load Data and Import Libraries

In [None]:
# Download Data from Kaggle
!wget 'https://storage.googleapis.com/kaggle-data-sets/5958/8831/bundle/'\
  'archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-'\
  'kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20230410%2Fauto%'\
  '2Fstorage%2Fgoog4_request&X-Goog-Date=20230410T193922Z&X-Goog-Expires='\
  '259200&X-Goog-SignedHeaders=host&X-Goog-Signature=60ee98d619ef8266a41c1'\
  '8cbb26a6a929c8cd07294f1473667789988edec800426c1bb309609e735d82feaa387de0'\
  '29f44569cf3d5b4ae962618182f7cb4a58716fcafebc2ca839d676b101e5bd6099d00a9'\
  '44d9f8f13b5c5302b200e8a7c1c65a805777dec3990c8fc0348c03979af73d3d4675c94'\
  '275949932ee5a28b6362413fb43c3d352651f37a35c13444bec693e122743afce46fdf2'\
  '268d0f0a3933e7609aa3e78e09a676972ed30249f72baab71511be9b81111c215df0744'\
  '6e5113dfa0c669fa8fc032f07e308470ea3452c7a5566d6b50cf802677a4c37edd4eace'\
  'd6ddf13cb4feefcab3434000105f813f72a1e58bc9f2b319028d87fe93c75509' -O data.zip
# Unzip it
!unzip -q -u '/content/data.zip' -d '/content'
# Remove the zip file
!rm 'data.zip'
# Change working directory
%cd '/content/AdienceBenchmarkGenderAndAgeClassification/'
# Remove unnecessory folders
!rm -r 'AdienceBenchmarkGenderAndAgeClassification'
!rm -r '__MACOSX'

--2023-04-10 20:06:48--  https://storage.googleapis.com/kaggle-data-sets/5958/8831/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20230410%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230410T193922Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=60ee98d619ef8266a41c18cbb26a6a929c8cd07294f1473667789988edec800426c1bb309609e735d82feaa387de029f44569cf3d5b4ae962618182f7cb4a58716fcafebc2ca839d676b101e5bd6099d00a944d9f8f13b5c5302b200e8a7c1c65a805777dec3990c8fc0348c03979af73d3d4675c94275949932ee5a28b6362413fb43c3d352651f37a35c13444bec693e122743afce46fdf2268d0f0a3933e7609aa3e78e09a676972ed30249f72baab71511be9b81111c215df07446e5113dfa0c669fa8fc032f07e308470ea3452c7a5566d6b50cf802677a4c37edd4eaced6ddf13cb4feefcab3434000105f813f72a1e58bc9f2b319028d87fe93c75509
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.2.128, 142.250.141.128, 2607:f8b0:4023:c0b::80, ...
Connecting to stora

In [None]:
# Import Libraries
import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
#import matplotlib.pyplot as plt
from tensorflow.keras import layers, Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

##Data Preprocessing

In [None]:
# Read and concatenate all files containing labels and path information
master = pd.concat([pd.read_csv(i, sep="\t") /
                    for i in os.listdir() if i.endswith('.txt')]).copy()
# Generate image path
master['image_path'] = master[['user_id', 'face_id', 'original_image']].apply(lambda x: os.path.join('faces', f"{x[0]}", f"coarse_tilt_aligned_face.{x[1]}.{x[2]}"), axis=1)
master.reset_index(drop = True, inplace = True)
# Clean data
master.dropna(inplace = True)
master = master[master['age'] != 'None']
master = master[master['gender'] != 'u']
master.drop(columns = ['user_id', 'original_image', 'face_id', 'tilt_ang', 'fiducial_yaw_angle', 'fiducial_score'], inplace = True)
# Encode target columns
master['gender'] = master['gender'].apply(lambda x: {'f': 0, 'm': 1}.get(x))
age_map = {i: str(sum(list(map(int, i[1:-1].split(', '))))//2) for i in sorted(master['age'].unique()) if i.startswith('(')}
age_map = {'(27, 32)': '(25, 32)', '(38, 42)': '(38, 43)', '(38, 48)': '(38, 43)', '(8, 23)': '(15, 20)', '2': '(0, 2)', '3': '(0, 2)', '13': '(8, 12)',
 '22': '(15, 20)', '23': '(25, 32)', '29': '(25, 32)', '34': '(25, 32)', '35': '(25, 32)', '36': '(38, 43)', '42': '(38, 43)', '45': '(38, 43)',
 '46': '(48, 53)', '55': '(48, 53)', '57': '(60, 100)', '58': '(60, 100)'}
master['age'] = master['age'].apply(lambda x: age_map.get(x, x))
age_map = {'(0, 2)': 0, '(4, 6)': 0, '(8, 12)': 0, '(15, 20)': 1, '(25, 32)': 1, '(38, 43)': 2, '(48, 53)': 3, '(60, 100)': 3}
inv_age = {0: '0-15', 1: '15-30', 2:'30-45', 3:'45+'}
inv_gen = {0: 'female', 1: 'male'}
master['age'] = master['age'].apply(lambda x: age_map.get(x, x))
# Select partial data
data = pd.DataFrame(columns = ['age', 'gender', 'image_path'])
for i in range(4):
  df1 = master[(master['gender'] == 0) & (master['age'] == i)][['age', 'gender', 'image_path']].iloc[:800]
  df2 = master[(master['gender'] == 1) & (master['age'] == i)][['age', 'gender', 'image_path']].iloc[:800]
  data = pd.concat([data, df1, df2])
data.reset_index(drop = True, inplace = True)

In [None]:
# Load Images
X = data['image_path']
y_gender = data['gender']
y_age = data['age']

X_images = []
for path in tqdm(X):
    img = Image.open(path)
    img = img.resize((224, 224))
    img = np.array(img)
    X_images.append(img)

X_images = np.array(X_images)

100%|██████████| 6400/6400 [00:46<00:00, 138.70it/s]


#Gender Prediction

In [None]:
# Generate Train Test Split
X_train, X_test, y_gender_train, y_gender_test = train_test_split(np.asarray(X_images).astype('float32'), np.asarray(y_gender.values).astype('float32'), test_size=0.3, stratify = y_gender, random_state=0)
# Set up callbacks for early stopping and generating checkpoints
es = EarlyStopping(monitor='val_accuracy', min_delta= 0.01 , patience= 5, verbose= 1, mode='auto')
mc = ModelCheckpoint(filepath="best_gender_model_1.h5", monitor= 'val_accuracy', verbose= 1, save_best_only= True, mode = 'auto')
call_back = [es, mc]
# Load Pre trained model
vgg16_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
# Make layers' weights static
for layer in vgg16_model.layers:
    layer.trainable = False

# Initialize a sequential model
model_gender = Sequential()
model_gender.add(vgg16_model)
model_gender.add(layers.GlobalAveragePooling2D())
model_gender.add(layers.Flatten())
model_gender.add(layers.Dense(256, activation="relu"))
model_gender.add(layers.Dropout(0.5))
model_gender.add(layers.Dense(128, activation="relu"))
model_gender.add(layers.Dropout(0.5))
model_gender.add(layers.Dense(2, activation="softmax"))
model_gender.summary()
# Compile the model
model_gender.compile(loss=SparseCategoricalCrossentropy(),\
                     optimizer='adam', metrics=['accuracy'])

# Train the neural network/model
history_gender = model_gender.fit(X_train, y_gender_train, batch_size=128,
                                  epochs=20, validation_batch_size=128,
                                  validation_data=(X_test, y_gender_test), 
                                  callbacks=[es, mc])
model_gender.save('gender_model_1.h5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 flatten (Flatten)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                        

#Age Prediction

In [None]:
# Train Test Split
X_train, X_test, y_age_train, y_age_test = train_test_split(np.asarray(X_images).astype('float32'), np.asarray(y_age.values).astype('float32'), test_size=0.2, stratify = y_age, random_state=0)
# Generate Callbacks
es = EarlyStopping(monitor='val_accuracy', min_delta= 0.01 , patience= 5, verbose= 1, mode='auto')
mc = ModelCheckpoint(filepath="best_age_model_1.h5", monitor= 'val_accuracy', verbose= 1, save_best_only= True, mode = 'auto')
call_back = [es, mc]
# Load Pretrained model
vgg16_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

for layer in vgg16_model.layers:
    layer.trainable = False

# Initialize a sequential model
model_age = Sequential()
model_age.add(vgg16_model)
model_age.add(layers.GlobalAveragePooling2D())
model_age.add(layers.Flatten())
model_age.add(layers.Dense(256, activation="relu"))
model_age.add(layers.Dropout(0.5))
model_age.add(layers.Dense(128, activation="relu"))
model_age.add(layers.Dropout(0.5))
model_age.add(layers.Dense(4, activation="softmax"))
model_age.summary()

model_age.compile(loss=SparseCategoricalCrossentropy(), optimizer='adam', metrics=['accuracy'])

# Train the neural network/model
history_age = model_age.fit(X_train, y_age_train, batch_size=128, epochs=30, validation_steps= 128, validation_data=(X_test, y_age_test), callbacks=[es, mc])
model_age.save('age_model_1.h5')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 flatten (Flatten)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                 




Epoch 1: val_accuracy improved from -inf to 0.41719, saving model to best_age_model_1.h5
Epoch 2/30



Epoch 3/30



Epoch 4/30



Epoch 5/30



Epoch 6/30



Epoch 7/30



Epoch 8/30



Epoch 9/30



Epoch 10/30



Epoch 11/30



Epoch 12/30



Epoch 13/30



Epoch 14/30



Epoch 15/30



Epoch 16/30



Epoch 17/30



Epoch 18/30



Epoch 19/30



Epoch 20/30



Epoch 21/30



Epoch 22/30



Epoch 23/30



Epoch 24/30



Epoch 25/30



Epoch 26/30



Epoch 27/30



Epoch 28/30



Epoch 29/30



Epoch 30/30



