In [138]:
import duckdb

db = duckdb.connect("data/data.db")
db.sql("select * from latest_global_object_ids")

┌──────────────────────────────────────┬──────────────────────────────────────┬───────────────────────┬────────────┬──────────────────┬──────────┬───────────┬──────────────┐
│                  id                  │               video_id               │ intravideo_cluster_id │ cluster_id │ confidence_score │ is_match │ threshold │  created_at  │
│                 uuid                 │                 uuid                 │         int32         │   int32    │      float       │ boolean  │   float   │    float     │
├──────────────────────────────────────┼──────────────────────────────────────┼───────────────────────┼────────────┼──────────────────┼──────────┼───────────┼──────────────┤
│ b27dd3c4-ad31-4640-a6c1-4ca8f2258a4b │ 2645977e-c87f-4a99-9200-7c69e3bf9fc0 │                     0 │          0 │              1.0 │ true     │       0.9 │ 1748611200.0 │
│ 8ff7d600-75cc-446f-aab0-5296a536cbfb │ 2645977e-c87f-4a99-9200-7c69e3bf9fc0 │                     1 │          1 │              

In [139]:
db.close()

In [115]:
QUERY = """
-- doing a stratified sample across video_id and cluster_id
-- databases aren't really great for complex stratified sampling like this
-- so we're just going to do every 10th record. we can think about
-- more complex sampling techniques and efficient implementations once
-- we identify exactly what the limitations of this are (there really may not be any)
with numbered as (
    select
        v.id as video_id,
        v.filepath,
        v.uploaded_at,
        d.osnet_embedding,
        ioi.cluster_id as intravideo_cluster_id,
        row_number() over (
            partition by v.id, ioi.cluster_id
            order by f.id -- or d.id, whatever is consistent
        ) as rn
    from video v
    left join frame f on v.id = f.video_id
    left join detection d on f.id = d.frame_id
    left join intravideo_object_ids ioi on d.id = ioi.detection_id
    where
        not (f.is_irregular or ioi.is_bad_frame)
)
select
    video_id,
    regexp_extract(filepath, '(video_[0-9])', 1) as video_name,
    uploaded_at,
    osnet_embedding,
    intravideo_cluster_id
from numbered
where rn % 5 = 1 -- every 5th record; adjust as needed

"""

df = conn.sql(QUERY).df()
df.head()

Unnamed: 0,video_id,video_name,uploaded_at,osnet_embedding,intravideo_cluster_id
0,b4ce8b0b-04e6-47d5-aa2a-2cbc70b67daa,video_2,1748605000.0,"[0.0720047, 0.13304754, 0.0, 0.0, 0.0, 0.02950...",1
1,b4ce8b0b-04e6-47d5-aa2a-2cbc70b67daa,video_2,1748605000.0,"[0.053059917, 0.122785814, 0.0, 0.0, 0.1649334...",1
2,b4ce8b0b-04e6-47d5-aa2a-2cbc70b67daa,video_2,1748605000.0,"[0.025426066, 0.08218063, 0.0, 0.0, 0.14551479...",1
3,b4ce8b0b-04e6-47d5-aa2a-2cbc70b67daa,video_2,1748605000.0,"[0.06980731, 0.09048571, 0.0, 0.0, 0.0529797, ...",1
4,b4ce8b0b-04e6-47d5-aa2a-2cbc70b67daa,video_2,1748605000.0,"[0.05251395, 0.055216823, 0.0, 0.0, 0.05586876...",1


In [None]:
from typing import Optional
import time
from pydantic import BaseModel, ConfigDict, Field
import duckdb
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import SpectralClustering

class CrossVideoReID(BaseModel):
    """Cross-video ReID."""
    model_config = ConfigDict(arbitrary_types_allowed=True)

    conn: duckdb.DuckDBPyConnection
    threshold: float = 0.9
    created_at: float = Field(default_factory=time.time)
    df: Optional[pd.DataFrame] = None
    
    def compute_crossvideo_reids(self) -> 'CrossVideoReID':
        """Compute cross-video reIDs"""
        df = self.conn.query(QUERY).df()
        
        # compute pca50
        pca_50 = PCA(n_components=50).fit_transform(df['osnet_embedding'].to_list())

        df['pred_cluster'] = SpectralClustering(n_clusters=2).fit_predict(pca_50)
        
        matches = []
        gt_groups = df.groupby(['video_id', 'intravideo_cluster_id'])
        
        for (vid, pid), group in gt_groups:
            gt_indices = set(group.index)
            best_score = 0
            best_cluster = None

            for cluster_id in df['pred_cluster'].unique():
                pred_indices = set(df[df['pred_cluster'] == cluster_id].index)
                intersection = gt_indices & pred_indices
                inclusion_score = len(intersection) / len(gt_indices)

                if inclusion_score > best_score:
                    best_score = inclusion_score
                    best_cluster = cluster_id

            matches.append({
                'video_id': vid,
                'intravideo_cluster_id': pid,
                'matched_cluster': best_cluster,
                'confidence_score': best_score,
                'is_match': best_score > self.threshold,
                'threshold': self.threshold
            })
            
        self.df = pd.DataFrame(matches)
        
        return self
        

In [None]:
conn.close()

In [134]:
CrossVideoReID(conn=db).compute_crossvideo_reids()

                               video_id  intravideo_cluster_id  \
0  2645977e-c87f-4a99-9200-7c69e3bf9fc0                      0   
1  2645977e-c87f-4a99-9200-7c69e3bf9fc0                      1   
2  b4ce8b0b-04e6-47d5-aa2a-2cbc70b67daa                      0   
3  b4ce8b0b-04e6-47d5-aa2a-2cbc70b67daa                      1   

   matched_cluster  confidence_score  is_match  
0                1               1.0      True  
1                0               1.0      True  
2                0               1.0      True  
3                1               1.0      True  



divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul



In [84]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

pca_50 = PCA(n_components=50).fit_transform(df['osnet_embedding'].to_list())
df['x_tsne'], df['y_tsne'] = zip(*TSNE(n_components=2).fit_transform(pca_50))


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by zero encountered in matmul


overflow encountered in matmul


invalid value encountered in matmul


divide by

In [85]:
import plotly.express as px

df['video_intravideo_id'] = df['video_name'] + '_person_' + df['intravideo_cluster_id'].astype(str)

px.scatter(
    df,
    'x_tsne',
    'y_tsne',
    color='video_intravideo_id',
    title="Cross-video OSNet Embeddings (t-SNE)"
    )