# AWS Rekognition

In [None]:
import os
import glob
import math

import numpy as np
import matplotlib.pyplot as plt

from imageio import imread
from sklearn.model_selection import train_test_split
from sklearn import metrics

%matplotlib inline

## Configuration

In [None]:
image_dir_basepath = '../data/images-out/'
names = [dir.split('/')[-1] for dir in glob.glob(os.path.join(image_dir_basepath, '*'))]

# AWS
region = 'us-east-1'
bucket_name = 'fdiaz-dataset-07feb2020-01'
collection_id = 'fdiaz-poc'

# Destroy resources once we get a result
destroy = False
upload_to_s3 = False

facenet_error_ids = [10, 33, 39, 62, 84, 87, 99, 111, 118, 137, 183, 194, 195]

boto3.setup_default_session(profile_name='fdiaz')

In [None]:
import os

import boto3
import joblib

class Rekognition(object):
    def __init__(self, collection_id, region_name='us-east-1'):
        """
        param: collection_id: Name of the Rekognition Collection.
        param: region_name: AWS region name.
        """
        self.client = boto3.client('rekognition', region_name=region_name)
        self.collection_id = collection_id
        self.face_ids = {}
        
    def list_faces(self, max_results=10):
        return self.client.list_faces(CollectionId=self.collection_id,
                                      MaxResults=max_results)

    def index_face(self, name, bucket, image):
        response = self.client.index_faces(CollectionId=self.collection_id,
                                           Image={'S3Object': {'Bucket': bucket,'Name': image}},
                                           MaxFaces=1,
                                           QualityFilter="AUTO",
                                           DetectionAttributes=['ALL'])

        for record in response['FaceRecords']:
            face_id = record['Face']['FaceId']
            if face_id in self.face_ids.keys():
                print('Image already in collection: {} ({})'.format(name, face_id))
            else:
                self.face_ids[face_id] = {'name': name,
                                          'image': image}
                print('Index image for: {} ({})'.format(name, face_id))
    
        return response

    def train(self, bucket, images, set_name='train'):
        response = []

        for image in images:
            name, filename = self.__s3_path(image, set_name)
            r = self.index_face(name, bucket, filename)
            response.append(r)

        return response
        
    def infer(self, bucket, images, set_name='test', max_faces=1, threshold=50):
        response = []
        pred = []
        proba = []
        
        for image in images:
            _, filename = self.__s3_path(image, set_name)
            print('Processing: {}'.format(filename))
            
            result = client.search_faces_by_image(CollectionId=self.collection_id,
                                                  Image={'S3Object': {'Bucket': bucket,'Name': filename}},
                                                  FaceMatchThreshold=threshold,
                                                  MaxFaces=max_faces)
            response.append(result)
            
            if len(result['FaceMatches']) > 0:
                face = result['FaceMatches'][0]
                pred.append(self.face_ids[face['Face']['FaceId']])
                proba.append(face['Similarity'])

        return pred, proba, response
    
    def __s3_path(self, image, set_name):
        img = os.path.basename(image)
        name = image.split('/')[-2]
        filename = os.path.join(set_name, name, img)
        return name, filename
    
    def save(self, filename='faceids.sav'):
        joblib.dump(self.face_ids, filename)

    def load(self, filename='faceids.sav'):
        if os.path.isfile(filename):
            self.face_ids = joblib.load(filename)

## Validate AWS account

Verify that the account where we will execute the "training" and inference is the right one.

In [None]:
client = boto3.client("sts")
account_id = client.get_caller_identity()
account_id

## Split dataset into training and test

In [None]:
test_size = 0.25
train_size = None
dataset_x = []
dataset_y = []

for name in names:
    dirpath = os.path.abspath(os.path.join(image_dir_basepath, name))
    image_paths = glob.glob(os.path.join(dirpath, '*.jpg'))
    dataset_x.extend(image_paths)
    dataset_y.extend([name] * len(image_paths))
    
print('Dataset size: {}'.format(len(dataset_x)))

x_train, x_test, y_train, y_test = train_test_split(dataset_x, dataset_y, random_state = 0,
                                                    train_size = train_size, test_size = test_size)

## S3 bucket

### Create S3 bucket

In [None]:
s3 = boto3.resource('s3')
s3.create_bucket(Bucket=bucket_name,
                 ACL='private')

### Upload images to S3

In [None]:
def upload_to_s3(bucket_name, set_name, images):
    """
    param: bucket_name: S3 bucket name.
    param: set_name: Dataset name usually test, train, validation.
    param: images: List of local image paths.
    """
    for image in images:
    img = os.path.basename(image)
    name = image.split('/')[-2]
    filename = os.path.join(set_name, name, img)
    print('Uploading: {}'.format(filename))
    s3.Object(bucket_name, filename,
              ExtraArgs={'ServerSideEncryption': 'AES256'}).upload_file(image)   

if upload_to_s3:
    upload_to_s3(bucket_name, 'train', x_train)
    upload_to_s3(bucket_name, 'test', x_test)

## Rekognition

In [None]:
client = boto3.client('rekognition', region_name=region)

### Create Collection

In [None]:
response = client.create_collection(CollectionId=collection_id)
response

In [None]:
rk = Rekognition(collection_id)
rk.load()

List faces should return an empty list as the Collection is new. Once we start populating the collection with faces we will get a non empty list. We can also validate the FaceModelVersion our Collection is using.

In [None]:
rk.list_faces()
x_train

In [None]:
train_records = rk.train(bucket_name, x_train)
train_records[:2]

In [None]:
pred, proba, infer_response = rk.infer(bucket_name, x_test, max_faces=1)

In [None]:
procRk = []
train = []

for i in rk.face_ids.values():
    procRk.append(i['image'].split('/')[-1])
    
for i in x_train:
    train.append(i.split('/')[-1])
    
(train[:2], procRk[:2])
(len(train), len(procRk))
for i, e in enumerate(train):
    if e not in procRk:
        print(e)

In [None]:
y_true = y_test
y_pred = [p['name'] for p in pred]

print('Test accuracy: {:.3f}'.format(metrics.accuracy_score(y_true, y_pred)))

In [None]:
print(metrics.classification_report(y_true, y_pred))

In [None]:
faces = len(x_test)
img_per_row = 8

fig, axes = plt.subplots(math.ceil(faces/img_per_row), img_per_row, figsize=(20, 60), sharex='col', sharey='row')
fig.subplots_adjust(hspace=0.4)#, wspace=0.1)

count = 0

for row in range(math.ceil(faces/img_per_row)):
    for col in range(8):
        if count < len(y_pred):
            if y_pred[count] == y_test[count]:
                box = {'facecolor':'none', 'edgecolor':'none'}
            else:
                box = {'facecolor':'yellow', 'alpha':0.2}
            axes[row, col].set_title('{} ({:.2f})'.format(y_pred[count], proba[count]), bbox=box)
            axes[row, col].imshow(imread(x_test[count]))
            axes[row, col].set_xticks([])
            axes[row, col].set_yticks([])
        count += 1

plt.show()

## Results

### General accuracy metrics

In [None]:
p = np.array(proba)
'Mean: {:.2f}, Std: {:.2f}, Min: {:.2f}, Max: {:.2f}, Total: {}'.format(p.mean(), p.std(), p.min(), p.max(), len(p))

### Boxplot

In [None]:
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(111)
ax.boxplot([proba], labels=['Rekognition'])

## Destroy S3 bucket and Rekognition Collection

In [None]:
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
for key in bucket.objects.all():
    key.delete()
bucket.delete()