# Trabalho Final - Aprendizado não supervisionado
## Aluno: Diego Leonardo Urban

## Importando as dependências

In [1]:
# Python
import sys
import random
from os import listdir
from os.path import isfile, join

# Numpy
import numpy as np

# OpenCV
import cv2
from google.colab.patches import cv2_imshow

## Definição das Classes

In [2]:
class Person():
  """Classe responsável por representar os dados de uma pessoa"""
  
  def __init__(self, id, label, data):
    self.id = id
    self.label = label
    self.data = data

In [3]:
class PCA():
  """Classe reponsável por realizar as operações de PCA"""

  def __init__(self, path):
    self.path = path

  def image_data(self, filename):
      img = cv2.imread(filename, cv2.IMREAD_GRAYSCALE)
      
      dsize = (80, 80)
      output = cv2.resize(img, dsize)

      dst = output.T.reshape((1, output.shape[1] * output.shape[0]))
      return np.float64(dst)

  def to_person(self, filename):
    data_part = filename[filename.rfind('/')+1 :filename.rfind('.jpg')]
    data = data_part.split('_')
    return Person(int(data[0]), int(data[1]), self.image_data(filename))

  def load_dataset(self, train, test, p):
    onlyfiles = [self.path + "/" + f for f in listdir(self.path) if isfile(join(self.path, f)) if f.endswith('.jpg')]
    people = list(map(self.to_person, onlyfiles))

    people.sort(key=lambda x: x.id, reverse=True)

    num_samples_per_person = 10
    samples = []
    for person in people:
      samples.append(person)
      if len(samples) == num_samples_per_person:
        while len(samples) > p:
          index = random.randint(0, len(samples)-1)          
          test.append(samples.pop(index))

        if p == num_samples_per_person:
          test.extend(samples)

        train.extend(samples)
        samples = []

  def main(self):
    train = []
    test = []
    p = 7

    self.load_dataset(train, test, p)

    print(f"-- Total de imagens: {len(train) + len(test)}")
    print(f"-- Hold-out: {p * 10}%")
    print(f"-- Total de treino: {len(train)}")
    print(f"-- Total de teste: {len(test)}")
    print("------------------------------")

    start = 10
    end = 20

    MAX_REC = 3500
    MAX_DIS = 1700

    for k in range(start, end+1):
      recognizer = cv2.face.EigenFaceRecognizer_create(k)
      src = []
      labels = []
      for person_train in train:
        src.append(person_train.data)
        labels.append(person_train.label)

      recognizer.train(src, np.asarray(labels))

      min_distance = sys.float_info.max
      max_distance = sys.float_info.min

      true_negative_count = 0
      true_positive_count = 0

      corrects = 0

      for person_test in test:
        label, confidence = recognizer.predict(person_test.data)
        
        label_ok = label == person_test.label
        if label_ok:
          corrects = corrects + 1

        if confidence > max_distance:
          if not label_ok:
            true_negative_count = true_negative_count + 1
        elif confidence > 1500:
          if not label_ok:
            true_negative_count = true_negative_count + 1
        elif label_ok:
          true_positive_count = true_positive_count + 1

        if label_ok and person_test.id <= 400:
          if confidence < min_distance:
            min_distance = confidence

          if confidence > max_distance:
            max_distance = confidence

      trues = true_positive_count + true_negative_count
      accuracy = trues / len(test) * 100
      print(f"{k} componentes principais, acurácia: {accuracy:.2f}%")

## Execução

In [4]:
print("------------------------------")
print('-- Bem-vindo a ferramenta PCA!')
print("------------------------------")

# TODO: Altere o path conforme necessidade
#path = './'
path = './resources/images'

pca = PCA(path)
pca.main()

------------------------------
-- Bem-vindo a ferramenta PCA!
------------------------------
-- Total de imagens: 410
-- Hold-out: 70%
-- Total de treino: 287
-- Total de teste: 123
------------------------------
10 componentes principais, acurácia: 88.62%
11 componentes principais, acurácia: 87.80%
12 componentes principais, acurácia: 88.62%
13 componentes principais, acurácia: 87.80%
14 componentes principais, acurácia: 86.99%
15 componentes principais, acurácia: 88.62%
16 componentes principais, acurácia: 89.43%
17 componentes principais, acurácia: 86.99%
18 componentes principais, acurácia: 86.18%
19 componentes principais, acurácia: 87.80%
20 componentes principais, acurácia: 88.62%
