## Computing baseline of our model

#### Importing libs

In [1]:
import pandas as pd
import numpy as np
import os
import cv2
from PIL import Image
from collections import Counter
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from tensorflow import stack
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import Resizing

import warnings
warnings.filterwarnings('ignore')

#### Upload images and organize them in the Pandas dataframe

In [2]:
# Get the list of all files and folders in the specified directory
directory_path = "../raw_data/faces"
img_folders = os.listdir(directory_path)
labels = [' '.join(folder.replace('Man.', '').replace('Real', '').split()[:-1]) for folder in img_folders]
img_folders

['João Félix Atlético',
 'Salah Liverpool',
 'Christensen Chelsea',
 'Mané Liverpool',
 'Gilberto Benfica',
 'Elanga Man. United',
 'Milner Liverpool',
 'Mengi Man. United',
 'Vlachodimos Benfica',
 'Morton Liverpool',
 'Luis Díaz Liverpool',
 'Gomez Liverpool',
 'Renan Lodi Atlético',
 'Diogo Jota Liverpool',
 'Hudson-Odoi Chelsea',
 'Rüdiger Chelsea',
 'Rafa Silva Benfica',
 'Richards Bayern',
 '.DS_Store',
 'Henderson Liverpool',
 'Pulišić Chelsea',
 'Robertson Liverpool',
 'Loftus-Cheek Chelsea',
 'Sané Bayern',
 'Thiago Silva Chelsea',
 'Oxlade-Chamberlain Liverpool',
 'Carvajal Real Madrid',
 'Koke Atlético',
 'Alisson Becker Liverpool',
 'Griezmann Atlético',
 'Lucas Vázquez Real Madrid',
 'Marcos Llorente Atlético',
 'Tolisso Bayern',
 'Barkley Chelsea',
 'Bailly Man. United',
 'Ronaldo Man. United',
 'Lewandowski Bayern',
 'Coman Bayern',
 'Alaba Real Madrid',
 'Azpilicueta Chelsea',
 'Marc Roca Bayern',
 'Valverde Real Madrid',
 'Sabitzer Bayern',
 'Cavani Man. United',
 'Taa

In [3]:
shorten_img_folders = []

for folder in img_folders:
    if 'Real Madrid' in folder or 'Chelsea' in folder or 'Liverpool' in folder:
        shorten_img_folders.append(folder)
        
shorten_img_folders

['Salah Liverpool',
 'Christensen Chelsea',
 'Mané Liverpool',
 'Milner Liverpool',
 'Morton Liverpool',
 'Luis Díaz Liverpool',
 'Gomez Liverpool',
 'Diogo Jota Liverpool',
 'Hudson-Odoi Chelsea',
 'Rüdiger Chelsea',
 'Henderson Liverpool',
 'Pulišić Chelsea',
 'Robertson Liverpool',
 'Loftus-Cheek Chelsea',
 'Thiago Silva Chelsea',
 'Oxlade-Chamberlain Liverpool',
 'Carvajal Real Madrid',
 'Alisson Becker Liverpool',
 'Lucas Vázquez Real Madrid',
 'Barkley Chelsea',
 'Alaba Real Madrid',
 'Azpilicueta Chelsea',
 'Valverde Real Madrid',
 'Matip Liverpool',
 'Van Dijk Liverpool',
 'Miguel Gutiérrez Real Madrid',
 'Phillips Liverpool',
 'Vallejo Real Madrid',
 'Chilwell Chelsea',
 'Courtois Real Madrid',
 'Benzema Real Madrid',
 'Bale Real Madrid',
 'Jović Real Madrid',
 'Elliott Liverpool',
 'Casemiro Real Madrid',
 'Alonso Chelsea',
 'Nacho Real Madrid',
 'Minamino Liverpool',
 'Woltman Liverpool',
 'James Chelsea',
 'Jones Liverpool',
 'Rodrygo Real Madrid',
 'Tsimikas Liverpool',
 '

In [4]:
labels = [' '.join(folder.replace('Real', '').split()[:-1]) for folder in shorten_img_folders]
labels

['Salah',
 'Christensen',
 'Mané',
 'Milner',
 'Morton',
 'Luis Díaz',
 'Gomez',
 'Diogo Jota',
 'Hudson-Odoi',
 'Rüdiger',
 'Henderson',
 'Pulišić',
 'Robertson',
 'Loftus-Cheek',
 'Thiago Silva',
 'Oxlade-Chamberlain',
 'Carvajal',
 'Alisson Becker',
 'Lucas Vázquez',
 'Barkley',
 'Alaba',
 'Azpilicueta',
 'Valverde',
 'Matip',
 'Van Dijk',
 'Miguel Gutiérrez',
 'Phillips',
 'Vallejo',
 'Chilwell',
 'Courtois',
 'Benzema',
 'Bale',
 'Jović',
 'Elliott',
 'Casemiro',
 'Alonso',
 'Nacho',
 'Minamino',
 'Woltman',
 'James',
 'Jones',
 'Rodrygo',
 'Tsimikas',
 'N. Williams',
 'E. Hazard',
 'Mendy',
 'Kanté',
 'Thiago Alcántara',
 'Mendy',
 'Werner',
 'Alexander-Arnold',
 'Konaté',
 'Lukaku',
 'Saúl Ñíguez',
 'Sarr',
 'Vinícius Júnior',
 'Kroos',
 'Origi',
 'Camavinga',
 'Kepa',
 'Kovačić',
 'Mariano Díaz',
 'Modrić',
 'Chalobah',
 'Bradley',
 'Havertz',
 'Blanco',
 'Asensio',
 'Keïta',
 'Jorginho',
 'Mount',
 'Ziyech',
 'Firmino',
 'Marcelo',
 'Ceballos',
 'Fabinho',
 'Éder Militão']

In [5]:
img_label_dict = {'image': [], 'name': []}
shapes = []

for img_folder, label in zip(shorten_img_folders, labels):
    directory_path = f"../raw_data/faces/{img_folder}"
    img_files = os.listdir(directory_path)
    
    print(f'{img_folder}: {len(img_files)}')
    
    for img_file in img_files:
        try:
            image_path = f"../raw_data/faces/{img_folder}/{img_file}"
            # image_tf = load_img(image_path)
            # image_np = img_to_array(image_tf)
            
            image_pil = Image.open(image_path)
            image_np = np.array(image_pil)
            
            shapes.append(image_np.shape)

            img_label_dict['image'].append(image_np)
            img_label_dict['name'].append(label)
        except:
            pass

Salah Liverpool: 30
Christensen Chelsea: 30
Mané Liverpool: 5
Milner Liverpool: 30
Morton Liverpool: 5
Luis Díaz Liverpool: 30
Gomez Liverpool: 30
Diogo Jota Liverpool: 30
Hudson-Odoi Chelsea: 30
Rüdiger Chelsea: 30
Henderson Liverpool: 30
Pulišić Chelsea: 0
Robertson Liverpool: 30
Loftus-Cheek Chelsea: 30
Thiago Silva Chelsea: 30
Oxlade-Chamberlain Liverpool: 30
Carvajal Real Madrid: 30
Alisson Becker Liverpool: 30
Lucas Vázquez Real Madrid: 30
Barkley Chelsea: 20
Alaba Real Madrid: 30
Azpilicueta Chelsea: 30
Valverde Real Madrid: 30
Matip Liverpool: 30
Van Dijk Liverpool: 30
Miguel Gutiérrez Real Madrid: 0
Phillips Liverpool: 19
Vallejo Real Madrid: 22
Chilwell Chelsea: 30
Courtois Real Madrid: 30
Benzema Real Madrid: 30
Bale Real Madrid: 31
Jović Real Madrid: 30
Elliott Liverpool: 30
Casemiro Real Madrid: 30
Alonso Chelsea: 30
Nacho Real Madrid: 30
Minamino Liverpool: 30
Woltman Liverpool: 1
James Chelsea: 30
Jones Liverpool: 30
Rodrygo Real Madrid: 30
Tsimikas Liverpool: 30
N. Will

In [6]:
img_label_dict['name']

['Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Mané',
 'Mané',
 'Mané',
 'Mané',
 'Mané',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',

In [7]:
def most_common(lst):
    counter = Counter(lst)
    most_common_item = max(counter, key=counter.get)
    return most_common_item

common = most_common(shapes)
common

(408, 612, 3)

In [8]:
img_label_dict['image'][0].shape

(447, 612, 3)

In [9]:
img_label_dict['name']

['Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Salah',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Christensen',
 'Mané',
 'Mané',
 'Mané',
 'Mané',
 'Mané',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',
 'Milner',

#### Initializing the model

In [10]:
def initialize_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=common))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

#### Preprocess the images

##### Shuffling data

In [11]:
data = pd.DataFrame(img_label_dict)

data = data.sample(frac = 1)
data.shape

(1974, 2)

##### Resizing images

In [12]:
height = common[0]
width = common[1]

resize = layers.Resizing(height, width)

In [13]:
preprocessed_img = []

for img in data['image'].to_numpy():
    if img.shape[-1] == 3:
        try:
            preprocessed_img.append(resize(img))
        except:
            pass

In [14]:
preprocessed_img = np.array(preprocessed_img)
preprocessed_img.shape

(1972, 408, 612, 3)

##### Removing 1-channel images (black&white)

In [15]:
print(shapes.index((408, 612)))
print(shapes.index((418, 612)))

1645
1788


In [16]:
labels = data['name']
del labels[1645]
del labels[1788]

#### Splitting data

In [17]:
X = preprocessed_img
y = np.array(labels)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

#### Fitting the model

In [19]:
X_test.shape

(592, 408, 612, 3)

In [20]:
es = EarlyStopping(monitor='accuracy', patience=3)

model = initialize_model()
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, callbacks=[es])

Epoch 1/20


2023-11-22 14:01:57.692405: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




KeyboardInterrupt: 

#### Evaluating prediction

In [None]:
model.evaluate(X_test, y_test)