In [23]:
import json
import shutil
from pathlib import Path
from urllib.parse import urlparse

import boto3
import requests
import s3fs
from tenacity import retry, stop_after_attempt, wait_exponential

In [15]:
s3 = s3fs.S3FileSystem()

In [16]:
bucket = "rekognitionbatchdetectst-bucketwithtestimages6d1e-1tquc76u5yxm8"

In [17]:
client = boto3.client("rekognition")

In [18]:
collection_id = "TestCollection"

In [19]:
def reset_collection(collection_id):
    response_0 = client.delete_collection(CollectionId=collection_id)
    response_1 = client.create_collection(CollectionId=collection_id)
    return response_0, response_1

In [20]:
# Create a collection
print(f"Creating collection: {collection_id}")
try:
    response = client.create_collection(CollectionId=collection_id)
    print("Collection ARN: " + response["CollectionArn"])
    print("Status code: " + str(response["StatusCode"]))
except client.exceptions.ResourceAlreadyExistsException:
    print(f"Collection {collection_id} already exists")
print("Done...")

Creating collection: TestCollection
Collection TestCollection already exists
Done...


In [56]:
response = client.create_collection(CollectionId=collection_id)
response

ResourceAlreadyExistsException: An error occurred (ResourceAlreadyExistsException) when calling the CreateCollection operation: The collection id: TestCollection already exists

In [53]:
response = client.delete_collection(CollectionId=collection_id)
print(response)

ResourceNotFoundException: An error occurred (ResourceNotFoundException) when calling the DeleteCollection operation: The collection id: TestCollection does not exist

## Prepare images

In [21]:
images_source = "http://vis-www.cs.umass.edu/lfw/lfw.tgz"

In [7]:
zip_file = Path("test.tgz")
with requests.get(images_source, stream=True) as r, zip_file.open("wb") as f:
    shutil.copyfileobj(r.raw, f)
shutil.unpack_archive("test.tgz", "data")
zip_file.unlink()

In [54]:
images_path_local = Path("data/lfw/")
[
    s3.put(k.as_posix(), f"s3://{bucket}/images/{k.relative_to(images_path_local)}")
    for k in images_path_local.glob("*/A*")
];

In [21]:
max_faces_index = 1
max_faces_match = 100
threshold = 50

In [48]:
@retry(stop=stop_after_attempt(7), wait=wait_exponential(multiplier=0.2, min=1, max=10))
def search_faces_retry(**kwargs):
    return client.search_faces(**kwargs)


def add_search_twin(image_path, image_id, collection_id):
    try:
        uri_parsed = urlparse(image_path)
    except:
        print("Could not parse the image path")
        return

    bucket = uri_parsed.netloc
    s3_path = uri_parsed.path[1:]

    response_add = client.index_faces(
        CollectionId=collection_id,
        Image={"S3Object": {"Bucket": bucket, "Name": s3_path}},
        ExternalImageId=image_id,
        MaxFaces=max_faces_index,
        QualityFilter="AUTO",
        DetectionAttributes=["DEFAULT"],
    )
    try:
        face_id = response_add["FaceRecords"][0]["Face"]["FaceId"]
    except Exception:
        print("no face detected in image")
        return

    response_search = search_faces_retry(
        CollectionId=collection_id,
        FaceId=face_id,
        FaceMatchThreshold=threshold,
        MaxFaces=max_faces_match,
    )

    faceMatches = response_search["FaceMatches"]
    matches = {
        k["Face"]["ExternalImageId"]: {"Similarity": k["Similarity"], "Face": k["Face"]}
        for k in faceMatches
    }
    return response_search["FaceMatches"]

In [27]:
test_list = sorted([k for k in images_path_local.glob("*/A*")])[:200]
len(test_list)

200

In [30]:
test_list[22]

PosixPath('data/lfw/Abdoulaye_Wade/Abdoulaye_Wade_0004.jpg')

In [43]:
a = test_list[22]
res = add_search_twin(
    f"s3://{bucket}/images/{a.relative_to(images_path_local)}",
    a.name,
    collection_id,
)
res

[{'Similarity': 99.99679565429688,
  'Face': {'FaceId': '8d5b0dd0-ac35-4330-9c60-30ab9790920b',
   'BoundingBox': {'Width': 0.40507200360298157,
    'Height': 0.5654019713401794,
    'Left': 0.3146420121192932,
    'Top': 0.1884160041809082},
   'ImageId': 'b3b1bba6-0c7a-357d-97a9-29195e9b00f9',
   'ExternalImageId': 'Abdoulaye_Wade_0002.jpg',
   'Confidence': 99.97209930419922}},
 {'Similarity': 99.98350524902344,
  'Face': {'FaceId': '446a716b-9a81-4f38-9ee6-3cc186e57138',
   'BoundingBox': {'Width': 0.4539870023727417,
    'Height': 0.5623430013656616,
    'Left': 0.32246100902557373,
    'Top': 0.2498140037059784},
   'ImageId': 'bc025554-9b8d-3b14-8b93-7f1920f73eb8',
   'ExternalImageId': 'Abdoulaye_Wade_0001.jpg',
   'Confidence': 99.9957046508789}},
 {'Similarity': 98.02940368652344,
  'Face': {'FaceId': '7ccf6294-2c1a-402e-94d3-3c098dfbe13f',
   'BoundingBox': {'Width': 0.4240429997444153,
    'Height': 0.641319990158081,
    'Left': 0.21246500313282013,
    'Top': 0.1645829975

In [47]:
[k['Face']['ExternalImageId'].rsplit('_', 1)[0] for k in res]

['Abdoulaye_Wade', 'Abdoulaye_Wade', 'Abdoulaye_Wade']

In [42]:
search_faces_retry(
    CollectionId=collection_id,
    FaceId="6b2cf05f-f8da-4d14-b599-e23c10c5cba8",
    FaceMatchThreshold=threshold,
    MaxFaces=max_faces_match,
)

{'SearchedFaceId': '6b2cf05f-f8da-4d14-b599-e23c10c5cba8',
 'FaceMatches': [],
 'FaceModelVersion': '5.0',
 'ResponseMetadata': {'RequestId': 'a6e50d4b-e81e-4945-9c20-580e8ca9acb5',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Tue, 15 Jun 2021 01:33:26 GMT',
   'x-amzn-requestid': 'a6e50d4b-e81e-4945-9c20-580e8ca9acb5',
   'content-length': '99',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

In [55]:
len(client.list_faces(CollectionId=collection_id))

{'Faces': [{'FaceId': '0029146d-ba42-4eb7-b748-115361e099fa',
   'BoundingBox': {'Width': 0.29640400409698486,
    'Height': 0.3713339865207672,
    'Left': 0.33479800820350647,
    'Top': 0.29830700159072876},
   'ImageId': 'c8da9836-dc35-3cc5-a1c1-50cd877f3165',
   'ExternalImageId': 'Akiko_Morigami_0001.jpg',
   'Confidence': 99.99829864501953},
  {'FaceId': '0054936b-1a10-49b0-98e3-b5f770c3c8df',
   'BoundingBox': {'Width': 0.3423219919204712,
    'Height': 0.43749499320983887,
    'Left': 0.35898199677467346,
    'Top': 0.2804880142211914},
   'ImageId': 'f2b73ea9-722c-36af-9138-74abbdbb7fcc',
   'ExternalImageId': 'Anderson_Varejao_0001.jpg',
   'Confidence': 99.99960327148438},
  {'FaceId': '01020586-2eb0-4348-9adc-b25f06526d11',
   'BoundingBox': {'Width': 0.33448100090026855,
    'Height': 0.47363701462745667,
    'Left': 0.36955198645591736,
    'Top': 0.2579990029335022},
   'ImageId': 'd87b5ec3-d10b-347d-8e44-ab4fc89942ec',
   'ExternalImageId': 'Andrew_Weissmann_0001.jpg',

In [253]:
tic = time.time()
report = [
    add_search_twin(
        f"s3://{bucket}/images/{k.relative_to(images_path_local)}",
        k.name,
        collection_id,
    )
    for k in test_list
]
toc = time.time() - tic

In [254]:
toc

107.07663011550903

In [255]:
report

[{},
 {},
 {},
 {},
 {},
 {},
 {'Aaron_Peirsol_0001.jpg': 99.99894714355469},
 {'Aaron_Peirsol_0002.jpg': 99.99881744384766,
  'Aaron_Peirsol_0001.jpg': 99.99076843261719},
 {'Aaron_Peirsol_0003.jpg': 99.8084487915039,
  'Aaron_Peirsol_0002.jpg': 99.78285217285156,
  'Aaron_Peirsol_0001.jpg': 97.55587768554688},
 {},
 {},
 {'Aaron_Sorkin_0001.jpg': 99.9853515625},
 {},
 {},
 {},
 {},
 {},
 {},
 {'Abdel_Nasser_Assidi_0001.jpg': 99.99700164794922},
 {},
 {'Abdoulaye_Wade_0001.jpg': 99.98780822753906},
 {'Abdoulaye_Wade_0002.jpg': 99.65143585205078,
  'Abdoulaye_Wade_0001.jpg': 98.72579193115234},
 {'Abdoulaye_Wade_0002.jpg': 99.99679565429688,
  'Abdoulaye_Wade_0001.jpg': 99.98350524902344,
  'Abdoulaye_Wade_0003.jpg': 98.02940368652344},
 {},
 {},
 {},
 {},
 {'Abdullah_0001.jpg': 99.99881744384766},
 {'Abdullah_0002.jpg': 99.96894836425781,
  'Abdullah_0001.jpg': 99.96778869628906},
 {'Abdullah_0002.jpg': 99.99919128417969,
  'Abdullah_0003.jpg': 99.99903869628906,
  'Abdullah_0001.jpg'

In [222]:
faces = client.list_faces(CollectionId=collection_id)["Faces"]

In [224]:
len(faces)

101

In [237]:
reset_collection(collection_id)

({'StatusCode': 200,
  'ResponseMetadata': {'RequestId': '160c114e-1303-4855-af3d-105a8899a348',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
    'date': 'Mon, 14 Jun 2021 06:36:46 GMT',
    'x-amzn-requestid': '160c114e-1303-4855-af3d-105a8899a348',
    'content-length': '18',
    'connection': 'keep-alive'},
   'RetryAttempts': 0}},
 {'StatusCode': 200,
  'CollectionArn': 'aws:rekognition:ap-southeast-1:570358149193:collection/TestCollection',
  'FaceModelVersion': '5.0',
  'ResponseMetadata': {'RequestId': '3ba86c81-d78b-4887-8743-d84cb72ddb95',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
    'date': 'Mon, 14 Jun 2021 06:36:46 GMT',
    'x-amzn-requestid': '3ba86c81-d78b-4887-8743-d84cb72ddb95',
    'content-length': '131',
    'connection': 'keep-alive'},
   'RetryAttempts': 0}})

In [2]:
payload = '{"Records":[{"eventVersion":"2.1","eventSource":"aws:s3","awsRegion":"ap-southeast-1","eventTime":"2021-06-14T14:52:14.325Z","eventName":"ObjectCreated:Put","userIdentity":{"principalId":"AWS:AROAYJS76WBE454BHFM4K:alecere-Isengard"},"requestParameters":{"sourceIPAddress":"27.125.177.195"},"responseElements":{"x-amz-request-id":"WED6FWYKCW0QK5PX","x-amz-id-2":"b9QHVC69vPWhowkDkcAYPJXcHwpNfkvt5ZIAN2yFOG05+IutPXbS8hRMTzvir9igVfH62OnKG8ahPtQvo0cN9l59qQ3R2uQA"},"s3":{"s3SchemaVersion":"1.0","configurationId":"ZWRmZjVmNjQtNmRjNi00NjdmLThmMzktYTBlMTZjZmEzYjFm","bucket":{"name":"rekognitionbatchdetectst-bucketwithtestimages6d1e-1tquc76u5yxm8","ownerIdentity":{"principalId":"ARJ0EWNBJ859Q"},"arn":"arn:aws:s3:::rekognitionbatchdetectst-bucketwithtestimages6d1e-1tquc76u5yxm8"},"object":{"key":"images/Aaron_Eckhart/Aaron_Eckhart_0001.jpg","size":10828,"eTag":"7fd4f5bb15d0446b17892d3d458928a1","sequencer":"0060C76D26F261602C"}}}]}'

In [13]:
for record in json.loads(payload)["Records"]:
    #     print(json.dumps(record, indent=2))
    print(record["s3"]["object"]["key"])

images/Aaron_Eckhart/Aaron_Eckhart_0001.jpg
