## Computing baseline of our model

#### Importing libs

In [1]:
import pandas as pd
import numpy as np
import os
import cv2
import gc
from google.colab import drive
from PIL import Image
from collections import Counter
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
import tensorflow as tf
from tensorflow import stack
from tensorflow import Tensor
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import Resizing

import warnings
warnings.filterwarnings('ignore')

#### Upload images and organize them in the Pandas dataframe

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Get the list of all files and folders in the specified directory
directory_path = '/content/drive/MyDrive/faces'
img_folders = os.listdir(directory_path)
len(img_folders)

127

In [4]:
shorten_img_folders = []

for folder in img_folders:
    if 'Real Madrid' in folder:
        shorten_img_folders.append(folder)

shorten_img_folders

['Ceballos Real Madrid',
 'Éder Militão Real Madrid',
 'Marcelo Real Madrid',
 'Asensio Real Madrid',
 'Vinícius Júnior Real Madrid',
 'Kroos Real Madrid',
 'Mariano Díaz Real Madrid',
 'Camavinga Real Madrid',
 'Mendy Real Madrid',
 'Rodrygo Real Madrid',
 'Casemiro Real Madrid',
 'Nacho Real Madrid',
 'Jović Real Madrid',
 'Vallejo Real Madrid',
 'Bale Real Madrid',
 'Benzema Real Madrid',
 'Courtois Real Madrid',
 'Alaba Real Madrid',
 'Lucas Vázquez Real Madrid',
 'Carvajal Real Madrid']

In [5]:
labels = [' '.join(folder.replace('Man.', '').replace('Real', '').split()[:-1]) for folder in shorten_img_folders]
len(labels)

20

In [6]:
from tqdm import tqdm

In [7]:
img_label_dict = {'image': [], 'name': []}
shapes = []

for img_folder, label in tqdm(zip(shorten_img_folders, labels)):
    directory_path = f"/content/drive/MyDrive/faces/{img_folder}"
    img_files = os.listdir(directory_path)

    for img_file in img_files:
        try:
            image_path = f"/content/drive/MyDrive/faces/{img_folder}/{img_file}"
            # image_tf = load_img(image_path)
            # image_np = img_to_array(image_tf)

            image_pil = Image.open(image_path)
            image_np = np.array(image_pil)

            shapes.append(image_np.shape)

            img_label_dict['image'].append(image_np)
            img_label_dict['name'].append(label)
        except:
            pass

20it [00:07,  2.69it/s]


In [8]:
first = [shape[0] for shape in shapes]
second = [shape[1] for shape in shapes]

np.mean(first), np.mean(second)

(440.4543889845095, 583.7521514629948)

In [9]:
len(img_label_dict['name'])

581

In [10]:
def most_common(lst):
    counter = Counter(lst)
    most_common_item = max(counter, key=counter.get)
    return most_common_item

common = most_common(shapes)
common

(408, 612, 3)

In [11]:
img_label_dict['image'][0].shape

(408, 612, 3)

In [12]:
np.asarray(img_label_dict['image'][0]).shape

(408, 612, 3)

In [13]:
len(labels)

20

In [14]:
print()




#### Initializing the model

In [15]:
def initialize_model():
    model = Sequential()
    # model.add(Resizing(common[0], common[1], input_shape=common))
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=common))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    # model.add(Conv2D(256, (2, 2), activation='relu'))
    # model.add(MaxPooling2D((2, 2)))
    # model.add(Conv2D(512, (3, 3), activation='relu'))
    # model.add(MaxPooling2D((4, 4)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(len(labels), activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

model = initialize_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 406, 610, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 203, 305, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 201, 303, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 100, 151, 64)      0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 98, 149, 128)      73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 49, 74, 128)       0

#### Preprocess the images

##### Shuffling data

In [16]:
data = pd.DataFrame(img_label_dict)

data = data.sample(frac = 1)
data.head()

Unnamed: 0,image,name
201,"[[[58, 60, 81], [58, 59, 80], [58, 59, 80], [5...",Mariano Díaz
33,"[[[96, 93, 102], [97, 94, 103], [97, 94, 103],...",Éder Militão
260,"[[[90, 109, 168], [90, 109, 168], [90, 109, 16...",Mendy
176,"[[[33, 30, 39], [33, 30, 39], [33, 30, 39], [3...",Kroos
493,"[[[97, 217, 179], [97, 217, 179], [97, 217, 17...",Alaba


##### Resizing images

In [17]:
height = common[0]
width = common[1]

resize = layers.Resizing(height, width)

In [18]:
# data['image'].to_numpy()[0].shape

In [19]:

# test1 = resize(data['image'].to_numpy()[2])

In [20]:
def remove_bw(images, names): # removing black&white images
  without_bw = []

  for ind, img in enumerate(images):
    if img.shape[-1] == 3:
      without_bw.append(img)
    else:
      del names[ind]

  return without_bw, names

valid_img, names = remove_bw(data['image'].to_numpy(), data['name'])

In [21]:
preprocessed_img = []

for img in valid_img:
  preprocessed_img.append(resize(img))

In [22]:
# preprocessed_img = np.array(preprocessed_img)
# preprocessed_img.shape

# img_data = data['image'].to_numpy()
# img_data.shape

# preprocessed_img = np.array(preprocessed_img)
# preprocessed_img.shape

##### Removing 1-channel images (black&white)

In [23]:
# print(shapes.index((408, 612)))
# print(shapes.index((418, 612)))

In [24]:
# total_labels = data['name']
# del total_labels[217]
# del total_labels[155]


#### Splitting data

In [25]:
X = preprocessed_img
y = np.array(names)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
# y_encoded

y_cat = to_categorical(y=y_encoded)
y_cat

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

In [26]:
# X.shape, y.shape

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.3, random_state=42, stratify=y_cat)

In [28]:
del preprocessed_img
del X
del y
del data
del img_label_dict
del y_encoded

gc.collect()

159

In [29]:
# X_train.shape

In [30]:
# X_test.shape

In [31]:
len(X_train)

406

In [32]:
y_train.shape

(406, 20)

In [33]:
y_test.shape

(175, 20)

#### Fitting the model

In [34]:
np.mean(y_test, axis=0)

array([0.05142857, 0.05142857, 0.05142857, 0.05142857, 0.05142857,
       0.05142857, 0.05142857, 0.05142857, 0.05142857, 0.05142857,
       0.05142857, 0.05142857, 0.05142857, 0.04571429, 0.05142857,
       0.05142857, 0.05142857, 0.05142857, 0.02857143, 0.05142857],
      dtype=float32)

In [35]:
np.mean(y_train, axis=0)

array([0.05172414, 0.05172414, 0.05172414, 0.04926108, 0.05172414,
       0.04926108, 0.04679803, 0.05172414, 0.05172414, 0.05172414,
       0.05172414, 0.05172414, 0.05172414, 0.04926108, 0.05172414,
       0.05172414, 0.05172414, 0.05172414, 0.02955665, 0.05172414],
      dtype=float32)

In [36]:
f"Baseline: {np.max(np.mean(y_train, axis=0))}"

'Baseline: 0.0517241396009922'

In [37]:
X_train = tf.convert_to_tensor(X_train)

In [38]:
X_test = tf.convert_to_tensor(X_test)

In [39]:
gc.collect()

0

In [None]:
es = EarlyStopping(monitor='accuracy', patience=3)

try:
  del model
except:
  pass

model = initialize_model()
history = model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, callbacks=[es], validation_data=(X_test, y_test))

Epoch 1/20
 1/13 [=>............................] - ETA: 4:08 - loss: 34.4900 - accuracy: 0.0625

In [None]:
history.history

#### Evaluating prediction

In [None]:
model.evaluate(X_test, y_test)

In [None]:
model.summary()