In [1]:
import numpy as np
import torch
import datetime
import os
import base64
import pandas as pd
from collections import Counter
from tqdm import tqdm
import sqlite3
from utils.pipeline import get_files
from utils.tools import seconds_to_time
# Set the maximum number of rows and columns to display
pd.set_option('display.max_rows', 1000)  # Adjust the number as needed
pd.set_option('display.max_columns', 1000)  # Adjust the number as needed

files = get_files('/home/diego/Documents/yolov7-tracker/runs/detect/2024_04_17_conce_bytetrack')
db = files['db']
FRAME_NUMBER = 15
conn = sqlite3.connect(db)
cursor = conn.cursor()    
bbox = pd.read_sql('SELECT * FROM bbox_raw', conn)
bbox['direction'] = bbox.apply(lambda row: ('undefined' if row['img_name'].split('_')[3] == 'None' else  row['img_name'].split('_')[3]) if row['img_name'] is not None else None, axis=1)
bbox['time_sec'] = bbox.apply(lambda row: int(row['frame_number']) // FRAME_NUMBER, axis=1)
bbox['time_video'] = pd.to_datetime(bbox['time_sec'], unit='s').dt.time


In [2]:
def get_direction_info(df):
    # Using .copy() to ensure the original dataframe is not affected
    temp_df = df.copy()
    first_directions = temp_df.groupby('id')['direction'].first().reset_index()
    direction_counts = first_directions['direction'].value_counts().to_dict()
    result = {k.lower(): v for k, v in direction_counts.items()}
    return result



In [3]:
get_direction_info(bbox)

{'undefined': 1466, 'out': 590, 'in': 565, 'cross': 64}

In [6]:
def get_overlap_undefined(df, offset_overlap, direction_type):
    assert 'time_sec' in df.columns, "DataFrame must include a 'time_sec' column."

    df_copy = df.dropna(subset=['img_name']).copy()
    # Filter rows where direction is In or Out and group by ID to find the min and max time_sec
    grouped = df_copy[df_copy['direction'].isin(['In', 'Out'])].groupby('id').agg(
        start=('time_sec', 'min'),
        direction=('direction', 'first'),
        end=('time_sec', 'max')
    ).reset_index()

    # Expand the time window by the offset_overlap
    grouped['start'] -= offset_overlap
    grouped['end'] += offset_overlap

    # Prepare the output dataframe
    result = []

    # Filter all undefined direction rows once for efficiency
    # undefined_rows = df_copy[df_copy['direction'] == 'undefined']
    undefined_rows = df_copy[df_copy['direction'].isin(direction_type)].groupby('id').agg(
        start=('time_sec', 'min'),
        direction=('direction', 'first'),
        end=('time_sec', 'max')
    ).reset_index()

    # Loop over each group and find overlaps with Undefined
    for _, row in grouped.iterrows():
        overlaps = undefined_rows[
            (undefined_rows['start'] >= row['start']) & (undefined_rows['end'] <= row['end']) |
            (undefined_rows['end'] >= row['start']) & (undefined_rows['end'] <= row['end']) |
            (undefined_rows['start'] >= row['start']) & (undefined_rows['start'] <= row['end']) |
            (row['start'] >= undefined_rows['start']) & (row['end'] <= undefined_rows['end'])
        ]
        for _, o_row in overlaps.iterrows():
            if row['id'] == o_row['id']:
                continue
            overlap_type = ''
            if o_row['start'] >= row['start'] and o_row['end'] <= row['end']:
                overlap_type = 'inside'
            elif o_row['end'] >= row['start'] and o_row['end'] <= row['end']:
                overlap_type = 'start_overlap'
            elif o_row['start'] >= row['start'] and o_row['start'] <= row['end']:
                overlap_type = 'end_overlap'
            elif row['start'] >= o_row['start'] and row['end'] <= o_row['end']:
                overlap_type = 'suprass'
            result.append({
                'id': row['id'],
                'direction': row['direction'],
                'start_time': pd.to_datetime(row['start'], unit='s').time(),
                'end_time': pd.to_datetime(row['end'], unit='s').time(),
                'id_overlap': o_row['id'],
                'direction_overlap': o_row['direction'],
                'overlap_type': overlap_type,
                'id_overlap_start_time': pd.to_datetime(o_row['start'], unit='s').time(),
                'id_overlap_end_time': pd.to_datetime(o_row['end'], unit='s').time(),
                'offset': offset_overlap,
                'count' : len([value for _,value in overlaps.iterrows() if value['id'] != row['id']])
            })

    # Convert result to DataFrame
    return pd.DataFrame(result)


In [7]:
overlap_results = get_overlap_undefined(bbox, 0,['undefined'])
overlap_results.to_csv('overlap_results_undefined.csv', index=False)

overlap_results = get_overlap_undefined(bbox, 0,['In', 'Out'])
overlap_results.to_csv('overlap_results_in_out.csv', index=False)

In [10]:
overlap_results['overlap_type'].value_counts()

overlap_type
inside           359
start_overlap    201
end_overlap      186
suprass           76
Name: count, dtype: int64

In [22]:
analysis.to_csv('analysis.csv', index=False)

Info before: {'undefined': 1466, 'out': 590, 'in': 565, 'cross': 64}
Info after: {'out': 590, 'in': 565, 'undefined': 456, 'cross': 64}


In [None]:
intervals = group_by_interval(filtered_df)
total = count_ids_in_intervals(intervals)
#print(total)

with open('intervals.json', 'w') as f:
	f.write(str(intervals))
 
with open('total.json', 'w') as f:
	f.write(str(total))

In [None]:
intervals[35750]

In [None]:
filtered_df.to_csv('bbox_2.csv', index=False)

### Ex

In [None]:
def group_by_interval(df):
    temp_df = df.dropna(subset=['img_name']).copy()
    group = temp_df.groupby(['time_interval', 'id']).agg({'direction': 'first'}).reset_index()
    
    interval_dict = {}
    for _, row in group.iterrows():
        interval = row['time_interval']
        if interval not in interval_dict:
            interval_dict[interval] = {}
        interval_dict[interval][row['id']] = {'direction': row['direction']}
    return interval_dict

# filter_interval_range_with_only_none
def get_intervals_with_none(interval_dict):
    none_intervals = {}
    only_ids = set()
    for interval, ids in interval_dict.items():
        all_none = True
        for id_info in ids.values():
            if id_info['direction'] != 'undefined':
                all_none = False
                break
        if all_none:
            none_intervals[interval] = ids.copy()  # Create a copy of ids if ids itself is mutable
            only_ids.update(ids.keys())
    return none_intervals, only_ids

def remove_ids_from_df(df, ids_to_remove):
    # Use the `~` operator to select rows where 'id' is not in 'ids_to_remove'
    filtered_df = df[~df['id'].isin(ids_to_remove)].copy()  # Explicitly copying is optional here since filtering creates a new df
    return filtered_df

def count_ids_in_intervals(interval_dict):
    interval_counts = {interval: len(ids) for interval, ids in interval_dict.items()}
    sorted_interval_counts = dict(sorted(interval_counts.items(), key=lambda item: item[1], reverse=True))
    return sorted_interval_counts

info_before = get_direction_info(bbox)
print(f"Info before: {info_before}")
df = add_column_time_interval(bbox, interval_sec=5, frame_rate=15)
result = group_by_interval(df)
intervals_keys,ids = get_intervals_with_none(result)
filtered_df = remove_ids_from_df(df, ids)
info_after = get_direction_info(filtered_df)
print(f"Info after: {info_after}")