# Particiones de los datos

A continuación realizaremos las particiones de los datos en conjunto de entrenamiento y test. En primer lugar importamos todos los paquetes necesarios y establecemos una semilla para que los procesos aleatorios sean reproducibles.

In [None]:
from facenet_pytorch import MTCNN
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
from skimage import io, transform
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
import numpy as np
import os
workers = 0 if os.name == 'nt' else 4
import numpy as np
import cv2
from PIL import Image, ImageDraw
from IPython import display
import matplotlib.patches as patches
import shutil as sh
import random
import re
numbers = re.compile(r'(\d+)')
#SET SEED
random.seed(10)


A continuación se definen las principales funciones empleadas y la clase que encapsula un dataset de imágenes con landmarks.

In [None]:
# Main functions
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

def create_train_test_datasets(path,train_percentage=0.8,test_percentaje=0.2):
    ids = list(range(164))
    random.shuffle(ids)
    num_images=len(ids)
    num_images_train=int(num_images)*train_percentage
    train_indices=ids[:int(num_images_train)]
    #Create folders
    if (os.path.isdir("./FORENSE_AM_TRAIN")==False):
        os.mkdir("./FORENSE_AM_TRAIN")
    if (os.path.isdir("./FORENSE_AM_TEST")==False):
        os.mkdir("./FORENSE_AM_TEST")
    os.chdir(path)
    # iteramos en los archivos y los separamos en entrenamiento y test 
    i=0
    files=sorted(os.listdir(), key=numericalSort)
    os.chdir("..")
    for j in range(len(files)):
        if i in train_indices:
            sh.move("./FORENSE_AM/"+files[j],"./FORENSE_AM_TRAIN/"+files[j])
            if files[j].endswith(".pts"):
                i+=1
        else:
            sh.move("./FORENSE_AM/"+files[j],"./FORENSE_AM_TEST/"+files[j])
            if files[j].endswith(".pts"):
                test_indices.append(i)
                i+=1

In [None]:
#Class 
def readLandmarks(landmarks_dir,alpha):
        os.chdir(landmarks_dir)
        fnames=[]
        ids=[]
        masks=[]
        landmarks_list=[]
        face_x=[]
        face_y=[]
        face_w=[]
        face_h=[]
        
        # iteramos en los archivos y rellenamos los 4 primeros
        i=0
        for file in sorted(os.listdir(), key=numericalSort):
            # Check whether file is in text format or not
            if file.endswith(".jpg") or file.endswith(".JPG"):
                image_file=file
                fnames.append(image_file)
                ids.append(i)
                # Construimos las BB
                image = io.imread(image_file)
                if(len(image.shape)<3):
                    #Grey scale to RGB
                    image=np.array([image,image,image])
                    image=np.moveaxis(image, 0, -1)
                bb,prob=mtcnn.detect(image)
                if file=='47_2.jpg':
                    print("File 55 cuidado ", file)
                    face_x.append(bb[1,0])
                    face_y.append(bb[1,1]-5)
                    face_w.append(abs(bb[1,0]-bb[1,2]))
                    face_h.append(abs(bb[1,1]-bb[1,3])*alpha)
                elif file=='47_4.jpg':
                    print("File 57 cuidado ", file)
                    face_x.append(bb[2,0])
                    face_y.append(bb[2,1]-5)
                    face_w.append(abs(bb[2,0]-bb[2,2]))
                    face_h.append(abs(bb[2,1]-bb[2,3])*alpha)
                else:
                    print("File ", i , "name: ", file)
                    face_x.append(bb[0,0])
                    face_y.append(bb[0,1]-5)
                    face_w.append(abs(bb[0,0]-bb[0,2]))
                    face_h.append(abs(bb[0,1]-bb[0,3])*alpha)
                i+=1

            if file.endswith(".pts"):
                with open(file) as f:
                    lines=f.readlines()
                    
                count=0
                landmarks=[]
                mask=[]
                for line in lines: 
                    count +=1
                    if(count >= 4 and count<=33):
                        vector=[int(i) for i in line.split()]
                        if(vector[0]==-1):
                            mask.append(0)
                        else:
                            mask.append(1)
                        landmarks.append(vector)
                
                landmarks_list.append(landmarks)
                masks.append(mask)
        
        df=pd.DataFrame({
            'fnames':fnames,
            'ra':ids,
            'landmarks_full':landmarks_list,
            'masks': masks,
            'face_x': face_x,
            'face_y': face_y,
            'face_w': face_w,
            'face_h': face_h
        })
        print(df)
        os.chdir("..")
        return df
    
class FaceLandmarksDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self,root_dir, alpha, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.landmarks_frame = readLandmarks(root_dir,alpha)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.landmarks_frame.iloc[idx,0])
        image = io.imread(img_name)
        landmarks = self.landmarks_frame.iloc[idx, 2]
        landmarks = np.array(landmarks)
        landmarks = np.array([np.array(x) for x in landmarks])
        bb=[self.landmarks_frame.iloc[idx,4],self.landmarks_frame.iloc[idx,5],self.landmarks_frame.iloc[idx,6],self.landmarks_frame.iloc[idx,7]]
        sample = {'fname': image, 'landmarks': landmarks, 'bb':bb}

        if self.transform:
            sample = self.transform(sample)

        return sample
    
    

Creamos las particiones de los datos:

In [None]:
create_train_test_datasets("./FORENSE_AM")

Inicializamos el detector de caras:

In [None]:
# Create face detector
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(image_size=128,margin=0,select_largest=False, device=device)
help(MTCNN)

creamos el fichero *annotations.csv* en el cual almacenaremos para casa imagen la información necesaria

In [None]:
#face_dataset=FaceLandmarksDataset('./FORENSE_AM',1.1)
#face_dataset.landmarks_frame.to_csv('./FORENSE_AM/annotations.csv')


In [None]:
face_dataset_train=FaceLandmarksDataset('./FORENSE_AM_TRAIN',1.1)
face_dataset_train.landmarks_frame.to_csv('./FORENSE_AM_TRAIN/annotations.csv')
face_dataset_test=FaceLandmarksDataset('./FORENSE_AM_TEST',1.1)
face_dataset_test.landmarks_frame.to_csv('./FORENSE_AM_TEST/annotations.csv')

Mostramos algunos ejemplos de bounding boxes:

In [None]:
#Rendimeinto del detector de caras en el dataset 
for i in range(164):
    image=face_dataset[i]['fname']
    fig,ax=plt.subplots()
    ax.imshow(image)
    rect=patches.Rectangle((face_dataset[i]['bb'][0],face_dataset[i]['bb'][1]),face_dataset[i]['bb'][2],face_dataset[i]['bb'][3], linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)
    ax.set_title('Sample #{}'.format(i))

    plt.show
    
