## Experiments querying events with processed AI

Basically we aim to filter by dates (initial and end) and classes of detections. That should return some some dynamoDB registers with associated S3 video files (many to one). 

In [None]:
DEVICE_NAME = "axis-p3827-front-far"

START_DATE = "2025-05-01T00:00:00+00:00"
END_DATE = "2025-05-01T23:59:59+00:00"

CLASSES_TO_QUERY = {"person", "dog", "bird"}

import boto3
from boto3.dynamodb.conditions import Key
from datetime import datetime
from pprint import pprint

# Convert to DynamoDB-compatible format (match what’s stored before the "_")
START_KEY = START_DATE.replace("T", " ")
END_KEY = END_DATE.replace("T", " ")

# Initialize DynamoDB
dynamodb = boto3.resource("dynamodb", region_name="eu-west-1")

table = dynamodb.Table("event_ai")

all_items = []
last_key = None

while True:
    if last_key:
        response = table.query(
            KeyConditionExpression=Key("device").eq(DEVICE_NAME) &
                                     Key("timestamp").between(START_KEY, END_KEY),
            ExclusiveStartKey=last_key
        )
    else:
        response = table.query(
            KeyConditionExpression=Key("device").eq(DEVICE_NAME) &
                                     Key("timestamp").between(START_KEY, END_KEY)
        )

    all_items.extend(response.get("Items", []))

    last_key = response.get("LastEvaluatedKey")
    if not last_key:
        break

print(f"Total items: {len(all_items)}")


In [None]:
COCO_CLASSES = {
    0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat',
    9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat',
    16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe',
    24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis',
    31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard',
    37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife',
    44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot',
    52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed',
    60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard',
    67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book',
    74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
}

In [None]:
match_items = []
print(f"Querying for items between {START_DATE} and {END_DATE} for device {DEVICE_NAME}")

for item in all_items:
    # pprint(item)
    classes = set([COCO_CLASSES[int(d["label"])] for d in item["all_fragment_detections"] if d["score"] >= 0.7])
    if classes.intersection(CLASSES_TO_QUERY):
        timestamp = item["timestamp"][:-4] # remove the frame number
        if timestamp not in match_items:
            match_items.append(timestamp)
        
print(f"Found {len(match_items)} matches in the time range {START_DATE} to {END_DATE}")

In [None]:
from datetime import datetime, timedelta


bucket_name = "motion-event-snapshots"
s3 = boto3.client("s3", region_name="eu-west-1")

video_files = []
paginator = s3.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=bucket_name, Prefix=DEVICE_NAME):
    for obj in page.get("Contents", []):
        key = obj["Key"]
        if key.endswith(".mp4"):
            ts_str = key.removeprefix(f"{DEVICE_NAME}/").removesuffix(".mp4")
            ts = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S")
            video_files.append((ts, key))

video_files.sort()


In [None]:
# 2. Map match_items to video file
match_to_video = {}
matched_videos = set()

for ts_str in match_items:
    match_ts = datetime.fromisoformat(ts_str.split("+")[0])  # strip offset if any
    lower_bound = match_ts - timedelta(seconds=11)

    # Find best match: latest video before or at match_ts
    best_match = None
    for vid_ts, key in reversed(video_files):
        if lower_bound <= vid_ts <= match_ts:
            best_match = key
            break

    match_to_video[ts_str] = best_match
    print(f"Match: {ts_str} -> Video: {best_match}")
    if best_match:
        matched_videos.add(best_match)
    
matched_videos

In [None]:
for m in matched_videos:
    url = s3.generate_presigned_url(
        "get_object",
        Params={"Bucket": bucket_name, "Key": m},
        ExpiresIn=3600,
    )

    print(url)