## Computing baseline of our model

#### Importing libs

In [1]:
import pandas as pd
import numpy as np
import os
import cv2
from PIL import Image
from collections import Counter
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from tensorflow import stack
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import Resizing

import warnings
warnings.filterwarnings('ignore')


#### Upload images and organize them in the Pandas dataframe

In [2]:
# Get the list of all files and folders in the specified directory
directory_path = "raw_data/faces"
img_folders = os.listdir(directory_path)
labels = [' '.join(folder.replace('Man.', '').replace('Real', '').split()[:-1]) for folder in img_folders]
len(img_folders)


178

In [3]:
img_label_dict = {'image': [], 'name': []}
shapes = []

for img_folder, label in zip(img_folders, labels):
    directory_path = f"raw_data/faces/{img_folder}"
    img_files = os.listdir(directory_path)

    for img_file in img_files:
        try:
            image_path = f"raw_data/faces/{img_folder}/{img_file}"
            # image_tf = load_img(image_path)
            # image_np = img_to_array(image_tf)

            image_pil = Image.open(image_path)
            image_np = np.array(image_pil)

            shapes.append(image_np.shape)

            img_label_dict['image'].append(image_np)
            img_label_dict['name'].append(label)
        except:
            pass


In [4]:
def most_common(lst):
    counter = Counter(lst)
    most_common_item = max(counter, key=counter.get)
    return most_common_item

common = most_common(shapes)
common


(408, 612, 3)

In [5]:
img_label_dict['image'][0].shape


(612, 408, 3)

In [6]:
np.asarray(img_label_dict['image'][0]).shape


(612, 408, 3)

#### Initializing the model

In [7]:
def initialize_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=common))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model


#### Preprocess the images

In [8]:
height = common[0]
width = common[1]

resize = layers.Resizing(height, width)


In [9]:
preprocessed_img = []

for img in img_label_dict['image']:
    try:
        preprocessed_img.append(resize(img))
    except:
        pass


In [10]:
preprocessed_img = np.array(preprocessed_img)
preprocessed_img.shape


(3900, 408, 612, 3)

In [11]:
print(shapes.index((408, 612)))
print(shapes.index((418, 612)))


3301
3589


In [12]:
labels = img_label_dict['name']
del labels[shapes.index((408, 612))]
del labels[shapes.index((418, 612))]


#### Splitting data

In [28]:
X = preprocessed_img[:100]
y = np.array(labels[:100])

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.5, random_state=42)


#### Fitting the model

In [30]:
X_test.shape


(50, 408, 612, 3)

In [31]:
es = EarlyStopping(monitor='accuracy', patience=3)

model = initialize_model()
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, callbacks=[es])


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


<keras.callbacks.History at 0x142f4a0e0>

#### Evaluate prediction

In [32]:
model.evaluate(X_test, y_test)




[0.0, 0.019999999552965164]