In [1]:
import numpy as np
import torch
import datetime
import os
import base64
import pandas as pd
from collections import Counter
from tqdm import tqdm
import sqlite3
from utils.pipeline import get_files
from utils.tools import seconds_to_time
# Set the maximum number of rows and columns to display
pd.set_option('display.max_rows', 1000)  # Adjust the number as needed
pd.set_option('display.max_columns', 1000)  # Adjust the number as needed

files = get_files('/home/diego/Documents/yolov7-tracker/runs/detect/2024_04_17_conce_bytetrack')
db = files['db']

conn = sqlite3.connect(db)
cursor = conn.cursor()    
bbox = pd.read_sql('SELECT * FROM bbox_raw', conn)
bbox['direction'] = bbox.apply(lambda row: ('undefined' if row['img_name'].split('_')[3] == 'None' else  row['img_name'].split('_')[3]) if row['img_name'] is not None else None, axis=1)

In [15]:
def get_direction_info(df):
    # Using .copy() to ensure the original dataframe is not affected
    temp_df = df.copy()
    first_directions = temp_df.groupby('id')['direction'].first().reset_index()
    direction_counts = first_directions['direction'].value_counts().to_dict()
    result = {k.lower(): v for k, v in direction_counts.items()}
    return result

def add_column_time_interval(df, interval_sec, frame_rate):
    df_copy = df.copy()
    frames_per_interval = interval_sec * frame_rate
    df_copy['time_interval'] = (df_copy['frame_number'] // frames_per_interval) * interval_sec
    df_copy['time_video'] = df_copy['time_interval'].apply(seconds_to_time)
    return df_copy

def analysis_by_interval(df):
    # Assert that 'time_interval' column exists in the DataFrame
    assert 'time_interval' in df.columns, "DataFrame must include a 'time_interval' column."
    
    # Filter rows where 'img_name' is not NaN to consider only valid images
    valid_rows = df.dropna(subset=['img_name']).copy()
    
    # Group by 'time_interval' and 'id' and get the first 'direction' for each group
    grouped = valid_rows.groupby(['time_interval', 'id']).agg({
        'direction': 'first',  # First direction found for the group
        'time_video': 'first'  # First time_video found for the group (assuming all entries per group are the same)
    }).reset_index()
    
    # Calculate the total number of IDs per interval
    total_counts = valid_rows.groupby('time_interval')['id'].nunique().to_dict()

    
    # Prepare the final DataFrame
    final_df = grouped.copy()
    final_df['total'] = final_df['time_interval'].apply(lambda x: total_counts[x])
    final_df['all_undefined'] = final_df.groupby('time_interval')['direction'].transform(lambda x: all(d == 'undefined' for d in x))
    
    return final_df




In [3]:
bbox_with_interval = add_column_time_interval(bbox, 5, 15)
analysis = analysis_by_interval(bbox_with_interval)

In [4]:
#get_direction_info(bbox)

In [18]:
analysis.to_csv('analysis.csv', index=False)

Info before: {'undefined': 1466, 'out': 590, 'in': 565, 'cross': 64}
Info after: {'out': 590, 'in': 565, 'undefined': 456, 'cross': 64}


In [None]:
intervals = group_by_interval(filtered_df)
total = count_ids_in_intervals(intervals)
#print(total)

with open('intervals.json', 'w') as f:
	f.write(str(intervals))
 
with open('total.json', 'w') as f:
	f.write(str(total))

In [None]:
intervals[35750]

In [None]:
filtered_df.to_csv('bbox_2.csv', index=False)

### Ex

In [None]:
def group_by_interval(df):
    temp_df = df.dropna(subset=['img_name']).copy()
    group = temp_df.groupby(['time_interval', 'id']).agg({'direction': 'first'}).reset_index()
    
    interval_dict = {}
    for _, row in group.iterrows():
        interval = row['time_interval']
        if interval not in interval_dict:
            interval_dict[interval] = {}
        interval_dict[interval][row['id']] = {'direction': row['direction']}
    return interval_dict

# filter_interval_range_with_only_none
def get_intervals_with_none(interval_dict):
    none_intervals = {}
    only_ids = set()
    for interval, ids in interval_dict.items():
        all_none = True
        for id_info in ids.values():
            if id_info['direction'] != 'undefined':
                all_none = False
                break
        if all_none:
            none_intervals[interval] = ids.copy()  # Create a copy of ids if ids itself is mutable
            only_ids.update(ids.keys())
    return none_intervals, only_ids

def remove_ids_from_df(df, ids_to_remove):
    # Use the `~` operator to select rows where 'id' is not in 'ids_to_remove'
    filtered_df = df[~df['id'].isin(ids_to_remove)].copy()  # Explicitly copying is optional here since filtering creates a new df
    return filtered_df

def count_ids_in_intervals(interval_dict):
    interval_counts = {interval: len(ids) for interval, ids in interval_dict.items()}
    sorted_interval_counts = dict(sorted(interval_counts.items(), key=lambda item: item[1], reverse=True))
    return sorted_interval_counts

info_before = get_direction_info(bbox)
print(f"Info before: {info_before}")
df = add_column_time_interval(bbox, interval_sec=5, frame_rate=15)
result = group_by_interval(df)
intervals_keys,ids = get_intervals_with_none(result)
filtered_df = remove_ids_from_df(df, ids)
info_after = get_direction_info(filtered_df)
print(f"Info after: {info_after}")