# Projekt z przedmiotu: "Systemy na bazie sztucznej inteligencji"
Temat własny: **Zaprojektować sieć neuronową do rozpoznawania osoby na podstawie zdjęcia**

Link do dataset: https://vis-www.cs.umass.edu/lfw/

# Użyte biblioteki

In [55]:
import os
import shutil
import cv2
import time
import random
import numpy as np

import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import preprocess_input

import seaborn as sns
import matplotlib.pyplot as plt
tf.__version__, np.__version__

import pickle

# Zmienne globalne

In [88]:
DATA_PATH = "Data"
DATASETS = "Datasets"
TRAIN_DATASETS_PATH = os.path.join(DATASETS, "trainDataset")
TEST_DATASETS_PATH = os.path.join(DATASETS, "testDataset")
TRAIN_TRIPLETS_PATH = os.path.join(DATASETS, "trainTriplets")
TEST_TRIPLETS_PATH = os.path.join(DATASETS, "testTriplets")

TRAIN_ANCHOR_DATASETS_PATH = os.path.join(DATASETS, "trainAnchorDataset")
TRAIN_POSITIVE_DATASETS_PATH = os.path.join(DATASETS, "trainPositiveDataset")
TRAIN_NEGATIVE_DATASETS_PATH = os.path.join(DATASETS, "trainNegativeDataset")
TRAIN_NEGATIVE_TRIPLETS_PATH = os.path.join(DATASETS, "trainNegativeTriplets")
TEST_ANCHOR_TRIPLETS_PATH = os.path.join(DATASETS, "testAnchorTriplets")
TEST_POSITIVE_TRIPLETS_PATH = os.path.join(DATASETS, "testPositiveTriplets")
TEST_NEGATIVE_TRIPLETS_PATH = os.path.join(DATASETS, "testNegativeTriplets")

CHECKPOINT_PATH = 'Checkpoints'
ENCODER_SAVE_PATH = 'Encoder'

LFW = "LFW"
LFW_DATASET_CHANGED = os.path.join(LFW, "lfw_changed")
LFW_DATASET = os.path.join(LFW, "lfw")
OUR_PHOTOS_DIR = os.path.join(LFW, "our_photos")
OUR_RAW_PHOTOS = os.path.join(OUR_PHOTOS_DIR, "raw")
OUR_EXTRACTED_FACES_PHOTOS = os.path.join(OUR_PHOTOS_DIR, "extracted")

# Foldery do prywatnych zdjęć

In [None]:
!mkdir LFW
!mkdir OUR_PHOTOS_DIR
!mkdir OUR_RAW_PHOTOS

# Pobranie i wypakowanie bazy zdjęć

In [None]:
!curl -o LFW/lfw.tgz http://vis-www.cs.umass.edu/lfw/lfw.tgz
!tar -xzvf "./LFW/lfw.tgz" -C "./LFW/"

# Wyodrębnienie twarzy ze zdjęć wrzuconych przez nas
Zdjęcia  (Imie_Nazwisko_XXXX.jpg, nieważny rozmiar) wrzucamy tu: ./LFW/our_photos/raw do utworzonego przez nas folderu Imie_Nazwisko.

* najlepiej jakby na zdjęciu znajdowała się tylko nasza twarz.

* co najmniej 4 zdjęcia.

* opcjonalnie sprawdzić w folderze: ./LFW/our_photos/extracted czy algorytm dobrze wyciął naszą twarz (uznał jakiś element zdjęcia za twarz, który twarzą nie jest).

In [57]:
haarCascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def extract_faces():
    for dir in os.listdir(OUR_RAW_PHOTOS):
        dir_path = os.path.join(OUR_RAW_PHOTOS, dir)
        count = 0

        extractedDir = os.path.join(OUR_EXTRACTED_FACES_PHOTOS, dir)
        if not os.path.exists(extractedDir):
                os.makedirs(extractedDir)
        else:
          files_in_extracted = os.listdir(extractedDir)
          if files_in_extracted:
              for file in files_in_extracted:
                  os.remove(os.path.join(extractedDir, file))

        for file in os.listdir(dir_path):
            image = cv2.imread(os.path.join(OUR_RAW_PHOTOS, dir, file))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            faces = haarCascade.detectMultiScale(image, scaleFactor=1.24, minNeighbors=4)

            for (x,y,w,h) in faces:
              face = image[y:(y+h), x:(x+w)]
              face = cv2.resize(face, (250, 250), interpolation = cv2.INTER_AREA)
              savePath = os.path.join(extractedDir, (str(count) + ".jpg"))

              cv2.imwrite(savePath, face)
              count += 1
              # zakładamy że jest jedna twarz na zdjęciu, break dlatego że nie wiem jak wyciągnąć tylko pierwszą wartość xd
              break;
extract_faces()

Remove files
Remove files


# Przeniesienie zdjęć z datasetu i naszych do innego folderu

* zdjęcia z datasetu są przenoszone pod warunkiem, że w folderze znajdują się co najmniej dwa zdjęcia danej osoby (po wycięciu twarzy).

In [58]:
def adjust_and_move_photos():
  if os.path.exists(LFW_DATASET_CHANGED):
    shutil.rmtree(LFW_DATASET_CHANGED)

  for dir in os.listdir(LFW_DATASET):
    dir_path = os.path.join(LFW_DATASET, dir)

    if(len(os.listdir(dir_path)) > 2):
      destinationDir = os.path.join(LFW_DATASET_CHANGED, dir)
      if not os.path.exists(destinationDir):
        os.makedirs(destinationDir)

      id = 0
      for file in os.listdir(dir_path):
        image = cv2.imread(os.path.join(LFW_DATASET, dir, file))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        savePath = os.path.join(destinationDir, (str(id) + ".jpg"))
        savingReturn = cv2.imwrite(savePath, image)
        print(savingReturn)
        if not savingReturn:
          return
        id += 1

adjust_and_move_photos()

Aaron_Peirsol
LFW\lfw_changed\Aaron_Peirsol\0.jpg
True
LFW\lfw_changed\Aaron_Peirsol\1.jpg
True
LFW\lfw_changed\Aaron_Peirsol\2.jpg
True
LFW\lfw_changed\Aaron_Peirsol\3.jpg
True
Abdoulaye_Wade
LFW\lfw_changed\Abdoulaye_Wade\0.jpg
True
LFW\lfw_changed\Abdoulaye_Wade\1.jpg
True
LFW\lfw_changed\Abdoulaye_Wade\2.jpg
True
LFW\lfw_changed\Abdoulaye_Wade\3.jpg
True
Abdullah
LFW\lfw_changed\Abdullah\0.jpg
True
LFW\lfw_changed\Abdullah\1.jpg
True
LFW\lfw_changed\Abdullah\2.jpg
True
LFW\lfw_changed\Abdullah\3.jpg
True
Abdullah_al-Attiyah
LFW\lfw_changed\Abdullah_al-Attiyah\0.jpg
True
LFW\lfw_changed\Abdullah_al-Attiyah\1.jpg
True
LFW\lfw_changed\Abdullah_al-Attiyah\2.jpg
True
Abdullah_Gul
LFW\lfw_changed\Abdullah_Gul\0.jpg
True
LFW\lfw_changed\Abdullah_Gul\1.jpg
True
LFW\lfw_changed\Abdullah_Gul\2.jpg
True
LFW\lfw_changed\Abdullah_Gul\3.jpg
True
LFW\lfw_changed\Abdullah_Gul\4.jpg
True
LFW\lfw_changed\Abdullah_Gul\5.jpg
True
LFW\lfw_changed\Abdullah_Gul\6.jpg
True
LFW\lfw_changed\Abdullah_Gul\7.j

In [64]:
# to tylko raz na całe uczenie
def split_dataset(directory, split=0.8):
  dirs = os.listdir(directory)
  random.shuffle(dirs)

  nof_train = int(len(dirs)*split)
  train_list, test_list = {}, {}

  # Create train list
  for dir in dirs[:nof_train]:
    nof_files = len(os.listdir(os.path.join(directory, dir)))
    train_list[dir] = nof_files

  # Create test list
  for dir in dirs[nof_train:]:
    nof_files = len(os.listdir(os.path.join(directory, dir)))
    test_list[dir] = nof_files

  return train_list, test_list

# [train_list, test_list] = split_dataset(LFW_DATASET_CHANGED)
# print(train_list)
# print(test_list)



In [71]:
# train_list, test list = {folderName: numberOfFilesInFolder, folderName: numberOfFilesInFolder, ...}
[train_list, test_list] = split_dataset(LFW_DATASET_CHANGED)

# save datasets
with open(TRAIN_DATASETS_PATH, 'wb') as output:
  pickle.dump(train_list, output)
with open(TEST_DATASETS_PATH, 'wb') as output:
  pickle.dump(test_list, output)

# print("Length of training list:", len(train_list))
# print("Length of testing list :", len(test_list))
# print("\nTrain List:", train_list)
# print("\nTest List:", test_list)

# Test
with open(TRAIN_DATASETS_PATH, 'rb') as input:
  inTrainSet = pickle.load(input)
with open(TEST_DATASETS_PATH, 'rb') as input:
  inTestSet = pickle.load(input)

# print(inTrainSet)
# print(inTestSet)

In [66]:

#to tylko raz na całe uczenie
# anchor - zdjęcie do którego porównujemy, positive - zdjęcie tej samej osoby, negative - zdjęcie losowej innej osoby
def create_triplets(directory, folder_list, max_files=10):
  triplets = []
  dirs = list(folder_list.keys())

  for dir in dirs:
    files = list(os.listdir(os.path.join(directory, dir)))[:max_files]
    num_files = len(files)

    for i in range(num_files - 1):
      for j in range(num_files - 1):
        if(j != i):
          anchor = (dir, f"{i}.jpg")
          positive = (dir, f"{j}.jpg")

          #find directory with photos of any other person
          neg_dir = dir
          while neg_dir == dir:
            neg_dir = random.choice(dirs)

          neg_file = random.randint(0, folder_list[neg_dir] - 1)
          negative = (neg_dir, f"{neg_file}.jpg")

          triplets.append((anchor, positive, negative))

  random.shuffle(triplets)
  return triplets

In [75]:
train_triplet = create_triplets(LFW_DATASET_CHANGED, train_list)
test_triplet  = create_triplets(LFW_DATASET_CHANGED, test_list)

with open(TRAIN_TRIPLETS_PATH, 'wb') as output:
  pickle.dump(train_triplet, output)
with open(TEST_TRIPLETS_PATH, 'wb') as output:
  pickle.dump(test_triplet, output)

print("Number of training triplets:", len(train_triplet))
# print(train_triplet)
print("Number of testing triplets :", len(test_triplet))
# print(test_triplet)

Number of training triplets: 15718
Number of testing triplets : 3818


In [83]:
with open(TRAIN_TRIPLETS_PATH, 'rb') as input:
  inTrainTripletsSet = pickle.load(input)
print("Number of training triplets:", len(inTrainTripletsSet))
#print(inTrainTripletsSet)

with open(TEST_TRIPLETS_PATH, 'rb') as input:
  inTestTripletsSet = pickle.load(input)
print("Number of testing triplets :", len(inTestTripletsSet))
# print(inTestTripletsSet)


Number of training triplets: 15718
Number of testing triplets : 3818


In [89]:
anchorData = []
positiveData = []
negativeData = []

def createAnchorPotsitiveNegativeDataset():
  for triplet in inTrainTripletsSet:
    a,p,n = triplet
    anchorData.append(os.path.join(DATA_PATH, a[0], a[1]))
    positiveData.append(os.path.join(DATA_PATH, p[0], p[1]))
    negativeData.append(os.path.join(DATA_PATH, n[0], n[1]))
  return 0


createAnchorPotsitiveNegativeDataset()
with open(TRAIN_ANCHOR_DATASETS_PATH, 'wb') as output:
  pickle.dump(anchorData, output)
with open(TRAIN_POSITIVE_DATASETS_PATH, 'wb') as output:
  pickle.dump(positiveData, output)
with open(TRAIN_NEGATIVE_DATASETS_PATH, 'wb') as output:
  pickle.dump(negativeData, output)
# print(anchorData)
# print(positiveData)
# print(negativeData)