# Limpieza de imágenes

In [65]:
from os import listdir
import cv2
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [66]:
faceCascade = cv2.Cascade_classifier('haarcascade_frontalface_default.xml')

In [67]:
def cleanData(img):
    '''
    Reconoce el rostro de personas sobre imágenes:
    - Crea una sección que encuadra el rostro.
    - Transforma la imagen a escala de grises.
    - Reescala la imagen a 48x48.
    - Traduce la información del cuadrante a np.array.
    
    En el caso de no reconocer ningun rostro devuelve /no.
    '''
    try:
        image = cv2.imread(img, cv2.IMREAD_COLOR)
        face_p = face_cascade.detectMultiScale(
            image,
            scaleFactor=1.1,
            minNeighbors=5
        )

        (x, y, w, h) = face_p[0]

        crop_image = image[y:y+h,x:x+w]

        img_data=cv2.resize(crop_image,(60,60))
        
    except Exception as e:
        return e,'no'
    
    return img_data
        

#### Dirección de los datos

In [68]:
key_m = './input/men/'
key_w = './input/women/'
value_m = listdir(key_m)
value_w = listdir(key_w)

In [69]:
p_man = [key_m + m for m in value_m]
p_woman = [key_w + w for w in value_w]

all_keys = ['man', 'woman']
all_values = [p_man, p_woman]

Concat de todos los path

In [70]:
concat_values = p_man + p_woman
concat_values[:5]

['./input/men/00001722.jpg',
 './input/men/00001044.jpg',
 './input/men/00001291.png',
 './input/men/00001050.jpg',
 './input/men/00001736.jpg']

Diccionario con los valores clave de cada imagen

In [71]:
p_dict = {}

for e in range(len(concat_values)):
    p_dict[e] = {
        'sex': concat_values[e].split('/')[2],
        'array': cleanData(concat_values[e])
    }

Limpieza de DataFrame final

In [72]:
data = pd.DataFrame(p_dict).T
data.head()

Unnamed: 0,sex,array
0,men,"[[[74, 83, 70], [75, 84, 71], [76, 84, 71], [7..."
1,men,"[[[2, 15, 29], [9, 23, 41], [16, 28, 45], [14,..."
2,men,"[[[128, 128, 0], [128, 128, 0], [128, 128, 0],..."
3,men,"[[[37, 27, 33], [38, 28, 34], [44, 35, 38], [4..."
4,men,"[[[255, 255, 255], [255, 255, 255], [255, 255,..."


In [73]:
data.reset_index(drop=True, inplace=True)

In [74]:
for e in range(len(data['array'])):
    if 'no' in data['array'][e]:
        data.drop(e, axis=0, inplace=True)
    else:
        pass

len(data)

  


2815

In [75]:
data.to_pickle('./output/dataCleanColorDef.pkl')