### Backfill `car_plate` given plate recognitions

2025-07-02: I want to add car_plate as a detected object when the plate recognition model read them. I did change the lambda function to this behavior, but a few days were wrong, so this script will fix that.

In [None]:
import boto3
from decimal import Decimal
from collections import defaultdict
from boto3.dynamodb.conditions import Key, Attr



def convert_floats_to_decimals(obj):
    if isinstance(obj, float):
        return Decimal(str(obj))
    elif isinstance(obj, list):
        return [convert_floats_to_decimals(i) for i in obj]
    elif isinstance(obj, dict):
        return {k: convert_floats_to_decimals(v) for k, v in obj.items()}
    else:
        return obj

AWS_REGION = "eu-west-1"

session = boto3.Session(region_name=AWS_REGION)
dynamodb = session.resource("dynamodb")

events_table = dynamodb.Table("events")

In [None]:
def safe_float(val):
    if isinstance(val, Decimal):
        return float(val)
    return val

def compute_car_plate_stats(plate_stats):
    avg_confs = []
    max_confs = []
    n_frames_total = 0

    for plate, stats in plate_stats.items():
        avg_confs.append(safe_float(stats["avg_confidence"]))
        max_confs.append(safe_float(stats["max_confidence"]))
        n_frames_total += int(stats["n_frames"])

    if avg_confs:
        avg_conf = round(sum(avg_confs) / len(avg_confs), 4)
        max_conf = round(max(max_confs), 4)
    else:
        avg_conf = max_conf = 0.0

    return {
        "avg_confidence": avg_conf,
        "max_confidence": max_conf,
        "n_frames": n_frames_total
    }

In [None]:
from datetime import datetime, timedelta, timezone
DAYS_BACK = 10

now = datetime.now(timezone.utc)
start_time = now - timedelta(days=DAYS_BACK)

# This assumes your event_timestamp is ISO8601 string and is the range key
start_iso = start_time.isoformat()
now_iso = now.isoformat()

print(f"Scanning for events from {start_iso} to {now_iso}")

scan_kwargs = {
    "FilterExpression": Attr("event_timestamp").between(start_iso, now_iso)
}

updated = 0
scanned = 0
last_evaluated_key = None

while True:
    if last_evaluated_key:
        scan_kwargs["ExclusiveStartKey"] = last_evaluated_key

    response = events_table.scan(**scan_kwargs)
    items = response.get("Items", [])

    for item in items:
        scanned += 1
        detection_stats = item.get("detection_stats", {})
        plate_stats = item.get("plate_recognition_stats")

        if not plate_stats:
            continue  # Skip if not present
        if not plate_stats.keys():
            continue  # Skip if present but empty (no plate detected)
        if "car_plate" in detection_stats:
            continue  # Already backfilled

        car_plate_stats = compute_car_plate_stats(plate_stats)
        if car_plate_stats["n_frames"] == 0:
            continue  # skip if somehow empty

        # Add to detection_stats and update in DynamoDB
        detection_stats["car_plate"] = car_plate_stats

        key = {
            "device_id": item["device_id"],
            "event_timestamp": item["event_timestamp"],
        }
        update_expr = "SET detection_stats = :d"
        expr_attr_vals = {":d": convert_floats_to_decimals(detection_stats)}

        events_table.update_item(
            Key=key,
            UpdateExpression=update_expr,
            ExpressionAttributeValues=expr_attr_vals
        )
        updated += 1
        print(f"Updated event {key}: {car_plate_stats}")

    last_evaluated_key = response.get("LastEvaluatedKey")
    if not last_evaluated_key:
        break

print(f"Total events scanned: {scanned}")
print(f"Total events updated: {updated}")