In [None]:
import pandas as pd
from ultralytics import YOLO
from ultralytics.engine.results import Results
import duckdb

conn = duckdb.connect(database=':memory:')

# Load YOLOv11 model for tracking people
model = YOLO("yolo11n.pt")

source = "data/video_1.mp4"

results = model(source, classes=[0], iou=0.4, conf=0.5)

# for memory
del model

# create df
frames = pd.DataFrame({"result": results})
frames['n_people_detected'] = frames['result'].map(lambda i: len(i.boxes))
frames['frame_id'] = range(len(results))

import plotly.express as px

px.line(frames, x='frame_id', y='n_people_detected', title="# People Detected By Frame")

In [None]:
import ruptures as rpt
import matplotlib.pyplot as plt

signal = frames['n_people_detected'].values
model = "l2"
algo = rpt.Pelt(model=model).fit(frames['n_people_detected'].values)
breakpoints = algo.predict(pen=6)

is_irregular_detection = []
for i in range(len(breakpoints)):
    start = 0 if i == 0 else breakpoints[i-1]
    end = breakpoints[i]
    dff = frames.iloc[start:end]
    mode = dff['n_people_detected'].mode().to_list()[0]
    print(start, end, mode)
    is_irregular_detection += [i != mode for i in dff['n_people_detected']]

frames['is_irregular_detection'] = is_irregular_detection

rpt.display(signal, breakpoints, figsize=(20, 6))
plt.show()

In [None]:
import uuid
import torch
from torchvision import transforms
from torch.nn.functional import normalize
import torchreid
import cv2
from tqdm.auto import tqdm

device = torch.device("mps" if torch.mps.is_available() else "cpu")
print(f"RUNNING ON {device}")

# Load OSNet model
osnet_model = torchreid.models.build_model(
    name='osnet_x1_0',
    num_classes=1000,
    loss='softmax',
    pretrained=True
    ).to(device).eval()

# Transform for OSNet input
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

osnet = []

# preprocess for osnet
for i, row in frames.iterrows():
    result: Results = row['result']
    confidences = result.boxes.conf.cpu().tolist()
    for conf, box in zip(confidences, result.boxes.xyxy):
        x1, y1, x2, y2 = map(int, box)
        crop_bgr = result.orig_img[y1:y2, x1:x2]
        if crop_bgr.size == 0:
            continue
        
        crop_rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB)
        osnet.append(
            {
                "detection_id": uuid.uuid4(),
                "input": transform(crop_rgb),
                "frame_id": row["frame_id"],
                "conf": float(conf),
                "x1": x1,
                "y1": y1,
                "x2": x2,
                "y2": y2
                }
            )

detections = pd.DataFrame(osnet)

batch_size = 32

embs = []
for i in tqdm(list(range(0, len(osnet), batch_size))):
    batch = torch.stack(detections.iloc[i:i+batch_size]['input'].to_list()).to(device)
    with torch.no_grad():
        embeddings = osnet_model(batch)
        print(embeddings.size())
    
    embs.append(embeddings)

detections['osnet_embedding'] = [i for i in normalize(torch.cat(embs), dim=1).cpu().numpy()]
del detections['input']

# for memory
del osnet_model

In [None]:
from sklearn.decomposition import PCA
import plotly.express as px

# pca for coords
detections['x_pca'], detections['y_pca'] = zip(*PCA(n_components=2).fit_transform(detections['osnet_embedding'].to_list()))
detections['pca_50'] = [i for i in PCA(n_components=50).fit_transform(detections['osnet_embedding'].to_list())]


query = """
select
    d.x_pca,
    d.y_pca,
    f.is_irregular_detection
from detections d
left join frames f on d.frame_id=f.frame_id
"""
fig = px.scatter(conn.sql(query).df(), x='x_pca', y='y_pca', color='is_irregular_detection')
fig.show()

In [None]:
import numpy as np

# mark any frames below 5th percentile confidence
detections['conf_under_p5'] = detections['conf'] < np.quantile(detections['conf'].to_numpy(), 0.05)

fig = px.histogram(detections, x='conf', title="Confidence Histogram")
fig.add_vline(x=np.quantile(detections['conf'].to_numpy(), 0.05))
fig.show()

In [None]:
from sklearn.cluster import KMeans

query = """
select
    d.frame_id,
    d.detection_id,
    d.osnet_embedding,
    f.n_people_detected
from detections d
left join frames f on f.frame_id = d.frame_id
where
    not (d.conf_under_p5 or f.is_irregular_detection)
"""

clusters = conn.query(query).df()

pca_50 = PCA(n_components=50).fit_transform(clusters['osnet_embedding'].to_list())
clusters['cluster_id_raw'] = KMeans(n_clusters=2).fit_predict(pca_50)

clusters['pca_x'], clusters['pca_y'] = zip(*PCA(n_components=2).fit_transform(clusters['osnet_embedding'].to_list()))

fig = px.scatter(
    clusters,
    x='pca_x',
    y='pca_y',
    color='cluster_id_raw',
    hover_data=[
        'cluster_id_raw',
        'pca_x',
        'pca_y',
        'frame_id',
        'n_people_detected'
    ]
    )

fig.show()

In [None]:
import duckdb

conn = duckdb.connect(':memory:')

query = """
with cpf as (
    select
        frame_id,
        count(distinct cluster_id_raw) as n_distinct_clusters_per_frame
    from clusters
    group by 1
    order by 1
),

bad_frames as (
    select
        clusters.frame_id
    from clusters
    left join cpf on clusters.frame_id = cpf.frame_id
    where
        n_people_detected > 1
        and n_people_detected > n_distinct_clusters_per_frame
    qualify count(*) over (partition by clusters.frame_id) > 1
),

cleaned as (
    select
        *,
        frame_id in (select * from bad_frames) as is_bad_frame,
        case
            when frame_id in (select * from bad_frames) then -1
            else cluster_id_raw
        end as cluster_id
    from clusters
)

select
    cleaned.*,
    detections.x1,
    detections.y1,
    detections.x2,
    detections.y2
from cleaned
left join detections on cleaned.detection_id=detections.detection_id
where not cleaned.is_bad_frame
"""

clean = conn.sql(query).df()

fig = px.scatter(
    clean,
    x='pca_x',
    y='pca_y',
    color='cluster_id',
    hover_data=["pca_x", "pca_y", "cluster_id", "frame_id", "is_bad_frame"]
    )
fig.show()

In [None]:
import cv2

cap = cv2.VideoCapture(source)
video_frames = []

while True:
    ret, frame = cap.read()
    if not ret:
        break
    video_frames.append(frame)

cap.release()

for i, frame in enumerate(video_frames):
    # pull up detections
    boxes = clean[clean['frame_id'] == i]
    if len(boxes) == 0:
        continue
    
    for j, row in boxes.iterrows():
        x1, y1, x2, y2 = row['x1'], row['y1'], row['x2'], row['y2']
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            frame,
            f'ID {row['cluster_id']}',
            (x1, y1-10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.6,
            (0, 255, 0),
            2
            )

for frame in video_frames:
    cv2.imshow("YOLO Detections", frame)
    if cv2.waitKey(50) & 0xFF == ord('q'):
        break
cv2.destroyAllWindows()