# Training

## Setup

In [1]:
from zipfile import ZipFile
import numpy as np
import cv2
import warnings
import pandas as pd
from io import BytesIO
# caso for usar o Google Colab com a OpenCV, usar a lib abaixo
from google.colab.patches import cv2_imshow
import sqlite3 as sql
from os.path import join

from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.layers import Flatten
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import GlobalAveragePooling2D
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import get_file
from keras import backend as K
from keras.applications.imagenet_utils import decode_predictions
from keras.applications.imagenet_utils import preprocess_input
from keras.utils import get_source_inputs
from keras.callbacks import ModelCheckpoint
#from keras.layers.merge import concatenate
from __future__ import print_function
from tensorflow.python.keras.utils import layer_utils
from tensorflow.python.keras.utils.layer_utils import get_source_inputs
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from keras.layers import Dropout
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Loading files

In [None]:
base_path = '/content/drive/MyDrive/PDI_EduardoSavian_PabloMarques_VitorCoelho_YuriRodrigues/Colab Notebooks/'

In [None]:
dt_path = join(base_path, 'data')
db_path = join(dt_path, 'dataset.db')
ds_o_path = join(dt_path, 'data.zip')
ds_t_path = join(dt_path, 'data_test.zip')

In [None]:
# Conectando no banco de dados
with sql.connect(db_path) as connection:
  cursor = connection.cursor()

  # Executar a consulta SQL
  cursor.execute("DROP TABLE IF EXISTS dt_Pessoas")

  # Commit para aplicar as alterações ao banco de dados
  connection.commit()
  consulta_sql = '''
        CREATE TABLE IF NOT EXISTS dt_Pessoas(
            id INTEGER PRIMARY KEY,
            nome_pessoa TEXT,
            NomeImagem TEXT,
            imagem BLOB
        )
  '''

  cursor.execute(consulta_sql)

  connection.commit()

  # images.to_sql('Pessoas',connection,index=True,if_exists='replace')

In [None]:
def read_image_from_zip_insert_sql(zip_path,img_size,img_limit,connection):
  cursor = connection.cursor()

  with ZipFile(zip_path, 'r') as zip_file:
    for item in zip_file.infolist():
            # Verifica se o item é um diretório (pasta)
            if item.is_dir():
                # Obtém o nome do diretório
                folder_name = item.filename

                listOfItens = zip_file.infolist()[:img_limit] if img_limit > 0 else zip_file.infolist()

                # Itera sobre todos os itens dentro do diretório
                for sub_item in listOfItens:
                    # Verifica se o item pertence ao diretório atual
                    if sub_item.filename.startswith(folder_name) and not sub_item.is_dir():
                      # Lê o conteúdo do arquivo zip
                      image_data = zip_file.read(sub_item.filename)

                      nparr = np.frombuffer(image_data, np.uint8)

                      # Lê a imagem usando o OpenCV
                      '''img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
                      img = cv2.resize(img, (img_size, img_size))'''
                      if folder_name.split('/')[1] != '':
                        nome_pessoa = folder_name.split('/')[1]
                        consultaSQL=f"""INSERT INTO dt_Pessoas(nome_pessoa,imagem) VALUES ('{nome_pessoa}',?)"""
                        cursor.execute(consultaSQL,(sql.Binary(image_data),))

  connection.commit()

In [None]:
with sql.connect(db_path) as connection:
  read_image_from_zip_insert_sql(ds_o_path,224,3000,connection)

In [None]:
num_pessoas = 0

with sql.connect(db_path) as connection:
  cursor = connection.cursor()
  cursor.execute('SELECT COUNT(DISTINCT nome_pessoa) FROM dt_Pessoas')
  connection.commit()

  num_pessoas = cursor.fetchone()[0]
print(num_pessoas)

567


In [None]:
#Vitor
def read_image_from_zip_return_df(zip_path,img_size,img_limit):
  images = []
  with ZipFile(zip_path, 'r') as zip_file:
    for item in zip_file.infolist():
            # Verifica se o item é um diretório (pasta)
            if item.is_dir():
                # Obtém o nome do diretório
                folder_name = item.filename

                listOfItens = zip_file.infolist()[:img_limit] if img_limit > 0 else zip_file.infolist()

                # Itera sobre todos os itens dentro do diretório
                for sub_item in listOfItens:
                    # Verifica se o item pertence ao diretório atual
                    if sub_item.filename.startswith(folder_name) and not sub_item.is_dir():
                      # Lê o conteúdo do arquivo zip
                      image_data = zip_file.read(sub_item.filename)

                      nparr = np.frombuffer(image_data, np.uint8)

                      # Lê a imagem usando o OpenCV
                      img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
                      img = cv2.resize(img, (img_size, img_size))
                      if folder_name.split('/')[1] != '':
                        images.append((folder_name,folder_name.split('/')[1], sub_item.filename, img))
    pd_images = pd.DataFrame(data=images,columns=['dir','Pessoa','NomeImagem','Imagem'])

    return pd_images

# images = read_image_from_zip_return_df(ds_o_path,224,0)

In [None]:
# del images

## Model

### Creating

In [2]:
model = VGG16()
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     14758

In [None]:
# Configuração dos geradores de dados
# Vitor
'''
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.10,preprocessing_function = preprocess_input)
dir_data='/content/drive/MyDrive/ML repository/Image Classification/Colab Notebooks/data'
train_generator = train_datagen.flow_from_directory(
    dir_data,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    #shuffle=True,
    subset='training')


validation_generator = train_datagen.flow_from_directory(
    dir_data,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    #shuffle=True,
    subset='validation')
'''

"\ntrain_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.10,preprocessing_function = preprocess_input)\ndir_data='/content/drive/MyDrive/ML repository/Image Classification/Colab Notebooks/data'\ntrain_generator = train_datagen.flow_from_directory(\n    dir_data,\n    target_size=(224, 224),\n    batch_size=32,\n    class_mode='categorical',\n    #shuffle=True,\n    subset='training')\n\n\nvalidation_generator = train_datagen.flow_from_directory(\n    dir_data,\n    target_size=(224, 224),\n    batch_size=32,\n    class_mode='categorical',\n    #shuffle=True,\n    subset='validation')\n"

In [None]:
# def VGGUpdate(classes,input_tensor=None):
# Pablo
img_rows,img_cols = 224,224
img_channels = 3

img_dim = (img_rows, img_cols, img_channels)

img_input = Input(img_dim,sparse=False)

# Bloco 1
x = Conv2D(64,(3,3),activation='relu',padding='same',name='bloco1_conv1',trainable=False)(img_input)
x = Conv2D(64,(3,3),activation='relu',padding='same',name='bloco1_conv2',trainable=False)(x)
x = MaxPooling2D((2,2),strides=(2, 2),name='bloco1_pool1',trainable=False)(x)
# Bloco 2
x = Conv2D(128,(3,3),activation='relu',padding='same',name='bloco2_conv1',trainable=False)(x)
x = Conv2D(128,(3,3),activation='relu',padding='same',name='bloco2_conv2',trainable=False)(x)
x = MaxPooling2D((2,2),strides=(2,2),name='bloco2_pool1',trainable=False)(x)
# Bloco 3
x = Conv2D(256,(3,3),activation='relu',padding='same',name='bloco3_conv1',trainable=False)(x)
x = Conv2D(256,(3,3),activation='relu',padding='same',name='bloco3_conv2',trainable=False)(x)
x = Conv2D(256,(3,3),activation='relu',padding='same',name='bloco3_conv3',trainable=False)(x)
x = MaxPooling2D((2,2),strides=(2,2),name='bloco3_pool1',trainable=False)(x)
# Bloco 4
x = Conv2D(512,(3,3),activation='relu',padding='same',name='bloco4_conv1',trainable=False)(x)
x = Conv2D(512,(3,3),activation='relu',padding='same',name='bloco4_conv2',trainable=False)(x)
x = Conv2D(512,(3,3),activation='relu',padding='same',name='bloco4_conv3',trainable=False)(x)
x = MaxPooling2D((2,2),strides=(2,2),name='bloco4_pool1',trainable=False)(x)
# Bloco 5
x = Conv2D(512,(3,3),activation='relu',padding='same',name='bloco5_conv1',trainable=False)(x)
x = Conv2D(512,(3,3),activation='relu',padding='same',name='bloco5_conv2',trainable=False)(x)
x = Conv2D(512,(3,3),activation='relu',padding='same',name='bloco5_conv3',trainable=False)(x)
x = MaxPooling2D((2,2),strides=(2,2),name='bloco5_pool1',trainable=False)(x)

# x = model.output
# Classificação
x =  Flatten(name='flatten',trainable=False)(x)
x =  Dense(4096,activation='relu',name='fc1', trainable=False)(x)
x =  Dense(4096,activation='relu',name='fc2', trainable=False)(x)

# DESCOMENTAR ESSE QUANDO FOR USAR IMAGENS DO IMAGEDATAGENERATOR
# x =  Dense(validation_generator.num_classes,activation='softmax',name='predictions')(x)

# DESCOMENTAR ESSE QUANDO FOR USAR IMAGENS DO ZIP
# x =  Dense(len(images["Pessoa"].unique()),activation='softmax',name='predictions', trainable=True)(x)

# DESCOMENTAR ESSE QUANDO FOR USAR IMAGENS DO SQL
x =  Dense(num_pessoas,activation='softmax',name='predictions', trainable=True)(x)

# model = Model(inputs=model.input, outputs=x, name='FaceRecognition')

model = Model(inputs=img_input, outputs=x, name='FaceRecognition')
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

model.summary()

# return model

Model: "FaceRecognition"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 bloco1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 bloco1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 bloco1_pool1 (MaxPooling2D  (None, 112, 112, 64)      0         
 )                                                               
                                                                 
 bloco2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 bloco2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                   

In [None]:
# model = VGGUpdate(classes=len(images['Pessoa'].unique()))

In [None]:
#Vitor
arr_img = []
arr_pessoas = []
img_size = 224
with sql.connect(db_path) as connection:
    connection.row_factory = sql.Row
    cursor = connection.cursor()

    cursor.execute("SELECT nome_pessoa, imagem FROM dt_Pessoas")


    connection.commit()

    response = cursor.fetchall()

    for dados_imagem in response:
      imagem_array = np.frombuffer(dados_imagem['imagem'], dtype=np.uint8)
      imagemLinha = cv2.imdecode(imagem_array, cv2.IMREAD_COLOR)
      imagemLinha = cv2.resize(imagemLinha, (img_size,img_size))
      arr_img.append(imagemLinha)
      arr_pessoas.append(dados_imagem['nome_pessoa'])

arr_imgNP = np.array(arr_img)
arr_pessoasNP = np.array(arr_pessoas)

arr_images255 = arr_imgNP.astype('float32')# / 255.0
#cv2_imshow(arr_images255[10])
#print(arr_pessoasNP[10])

Pessoas_Nome = arr_pessoasNP
Pessoas_Nome_Unique = np.unique(arr_pessoasNP)
#print(arr_pessoasNP)

#LabelEncoder
y_encoder = LabelEncoder()
Pessoas = y_encoder.fit_transform(Pessoas_Nome)
#print(Pessoas)

#ColumnTransformer
Pessoas = Pessoas.reshape(-1,1)
Y = ColumnTransformer([('Pessoa',OneHotEncoder(),[0])], remainder = 'passthrough').fit_transform(Pessoas)


#Split
arr_images255, Y = shuffle(arr_images255, Y, random_state=1)

train_x, test_x, train_y, test_y = train_test_split(arr_images255, Y, test_size=0.2, random_state=415)
train_y = train_y.toarray()
test_y = test_y.toarray()
# print(train_y[0])
# print(Pessoas_Nome_Unique[np.argmax(train_y[0])])
# cv2_imshow(train_x[0])
# print(arr_pessoasNP[np.argmax(train_y[3])])

### Training

In [None]:
models_path = join(base_path, 'models')
model_path = join(models_path, 'model.h5')
cmodel_path= join(models_path, 'cmodel.h5')

In [None]:
#Yuri
checkpoint_filepath=cmodel_path
load_filepath=model_path
Checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    monitor='val_accuracy',
    mode='max',
    verbose=1
)

In [None]:
model.load_weights(load_filepath)

# history = model.fit(train_generator,
#                      epochs = 60,
#                      #steps_per_epoch = train_samples,
#                      shuffle = True,
#                      workers=8,
#                      callbacks = Checkpoint,
#                      validation_data = validation_generator,
#                      #validation_steps = train_samples
#                     )

#temp = model.fit(train_x,train_y,epochs=24,batch_size=32,validation_data = (test_x,test_y))#,callbacks=[Checkpoint])

'history = model.fit(train_generator,\n                     epochs = 60,\n                     #steps_per_epoch = train_samples,\n                     shuffle = True,\n                     workers=8,\n                     callbacks = Checkpoint,\n                     validation_data = validation_generator,\n                     #validation_steps = train_samples\n                    )\n'

In [None]:
#model.load_weights(checkpoint_filepath)
#Yuri

loss, acc = model.evaluate(test_x,test_y,verbose=2)

16/16 - 2s - loss: 6.3322 - accuracy: 0.0000e+00 - 2s/epoch - 140ms/step
