In [None]:
import numpy as np
import torch
import datetime
import os
import base64
import pandas as pd
from collections import Counter
from tqdm import tqdm
import sqlite3
from utils.pipeline import get_files
from utils.data_analysis import get_overlap_undefined,get_direction_info
from utils.types import Direction
# Set the maximum number of rows and columns to display
pd.set_option('display.max_rows', 1000)  # Adjust the number as needed
pd.set_option('display.max_columns', 1000)  # Adjust the number as needed

files = get_files('/home/diego/Documents/yolov7-tracker/runs/detect/2024_04_17_conce_bytetrack')
db = files['db']
FRAME_NUMBER = 15
conn = sqlite3.connect(db)
bbox = pd.read_sql('SELECT * FROM bbox_raw', conn)
bbox['direction'] = bbox.apply(lambda row: ('undefined' if row['img_name'].split('_')[3] == 'None' else  row['img_name'].split('_')[3]) if row['img_name'] is not None else None, axis=1)
bbox['time_sec'] = bbox.apply(lambda row: int(row['frame_number']) // FRAME_NUMBER, axis=1)
bbox['time_video'] = pd.to_datetime(bbox['time_sec'], unit='s').dt.time


In [None]:
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from scipy.stats import linregress


def create_plot(df, primary_id,unique_ids, intersecting_id_colors=['green', 'orange', 'purple', 'cyan', 'magenta', 'yellow', 'brown']):
    slopes = {}
    for unique_id in unique_ids:
        id_data = df[df['id'] == unique_id]
        slope, intercept, r_value, p_value, std_err = linregress(id_data['frame_number'], id_data['distance_to_center'])
        slopes[unique_id] = slope
        
    reference_slope = slopes[primary_id]  # You need to define this
    normalized_slopes = {unique_id: slope / reference_slope for unique_id, slope in slopes.items()}
    
    
    
    scaled_slopes = {}
    for unique_id in unique_ids:
        id_data = df[df['id'] == unique_id]
        min_y = id_data['distance_to_center'].min()
        max_y = id_data['distance_to_center'].max()
        scaled_slopes[unique_id] = (slopes[unique_id] - min_y) / (max_y - min_y) if max_y != min_y else 0
    
    
    standardized_slopes = {}
    for unique_id in unique_ids:
        id_data = df[df['id'] == unique_id]
        mean_y = id_data['distance_to_center'].mean()
        std_y = id_data['distance_to_center'].std()
        standardized_slopes[unique_id] = (slopes[unique_id] - mean_y) / std_y if std_y != 0 else 0

    
    
    # Create the figure and axis objects
    fig, ax = plt.subplots(figsize=(10, 6))  # You can adjust the size as needed

    # Filter the dataframe for the primary ID and sort it
    df_primary_id = df[df['id'] == primary_id].sort_values(by='frame_number')

    # Scatter plot for the primary ID with different colors for positive and negative distances
    positive_distance = df_primary_id['distance_to_center'] > 0
    ax.scatter(df_primary_id[positive_distance]['frame_number'], df_primary_id[positive_distance]['distance_to_center'],c='blue', s=10, alpha=0.6, label=f'ID {primary_id} In')
    ax.scatter(df_primary_id[~positive_distance]['frame_number'], df_primary_id[~positive_distance]['distance_to_center'],c='red', s=10, alpha=0.6, label=f'ID {primary_id} Out')

    # Calculate the time frame start and end
    timeframe_start = df['frame_number'].min()
    timeframe_end = df['frame_number'].max()

    # Identify other intersecting IDs within this timeframe
    intersecting_ids = df[(df['frame_number'] >= timeframe_start) & (df['frame_number'] <= timeframe_end) &  (df['id'] != primary_id)]['id'].unique()

    # Plot data for each intersecting ID within the timeframe
    for idx, other_id in enumerate(intersecting_ids):
        df_other_id = df[(df['id'] == other_id) &  (df['frame_number'] >= timeframe_start) &  (df['frame_number'] <= timeframe_end)]
        color = intersecting_id_colors[idx % len(intersecting_id_colors)]
        ax.scatter(df_other_id['frame_number'], df_other_id['distance_to_center'], c=color, s=20, edgecolor='k', label=f'ID {other_id}')
    
    # Plot formatting
    ax.axhline(y=0, color='black', linestyle='--', linewidth=1)
    ax.set_xlim(timeframe_start - 10, timeframe_end + 10)  # A little space around the edges
    ax.set_ylim(df['distance_to_center'].min() - 10, df['distance_to_center'].max() + 10)

    # Construct the title to include the ID and its timeframe
    duration = timeframe_end - timeframe_start
    title_text = f'ID {primary_id}: #{duration} '
    ax.set_title(title_text, color='red')

    ax.set_xlabel('Frame Number')
    ax.set_ylabel('Distance to Center')
    ax.legend(loc='upper left', fontsize='small')
    slopes_text = ', '.join([f'{key}: {value:.3f}' for key, value in slopes.items()])
    fig.text(0.5, 1, f"{slopes_text} = slopes", ha='center', va='center', fontsize=10, color='green', style='italic')  # Adjust the position and style as needed
    
    normalized_slopes_text = ', '.join([f'{key}: {value:.3f}' for key, value in normalized_slopes.items()])
    fig.text(0.5, -0.02, f"{normalized_slopes_text} = normalized", ha='center', va='center', fontsize=10, color='blue', style='italic')  # Adjust the position and style as needed
    
    scaled_slopes_text = ', '.join([f'{key}: {value:.3f}' for key, value in scaled_slopes.items()])
    fig.text(0.5, -0.05, f"{scaled_slopes_text} = scaled", ha='center', va='center', fontsize=10, color='blue', style='italic')  # Adjust the position and style as needed
    
    standardized_slopes_text = ', '.join([f'{key}: {value:.3f}' for key, value in standardized_slopes.items()])
    fig.text(0.5, -0.08, f"{standardized_slopes_text} = standardized", ha='center', va='center', fontsize=10, color='blue', style='italic')  # Adjust the position and style as needed
    
    
    plt.tight_layout()
    plt.show()

In [None]:
from scipy.stats import pearsonr
import seaborn as sns

query = 'SELECT * FROM overlap_results WHERE count = 1'
overlap_results = pd.read_sql(query, conn)

ids_overlap = list(overlap_results['id_overlap'])
query = 'SELECT id, distance_to_center, frame_number FROM bbox_raw WHERE id IN ({})'.format(', '.join(['?']*len(ids_overlap)))
bboxes = pd.read_sql(query, conn, params=ids_overlap)

def calculate_similarity(df, ids):
    correlation_matrix = np.zeros((len(ids), len(ids)))
    
    for i, id1 in enumerate(ids):
        for j, id2 in enumerate(ids):
            if i >= j:
                continue
            series1 = df[df['id'] == id1]['distance_to_center'].values
            series2 = df[df['id'] == id2]['distance_to_center'].values
            
            # Make series the same length
            min_length = min(len(series1), len(series2))
            series1 = series1[:min_length]
            series2 = series2[:min_length]
            
            # Calculate Pearson Correlation Coefficient
            corr, _ = pearsonr(series1, series2)
            correlation_matrix[i, j] = corr
            correlation_matrix[j, i] = corr  # Matrix is symmetrical
    
    return correlation_matrix

# Usage:
# ids_overlap could be a list of all unique ids you want to compare
similarity_matrix = calculate_similarity(bboxes, ids_overlap)



In [None]:
import pandas as pd
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw

primary_id = 3625
query = 'SELECT * FROM overlap_results'
overlap_results = pd.read_sql(query, conn)

overlap_results_only_id = overlap_results[overlap_results['id'] == primary_id]
ids_overlap = list(overlap_results_only_id['id_overlap'])
ids_overlap.append(primary_id)  # Append the integer version of id

query = 'SELECT id, distance_to_center, frame_number FROM bbox_raw WHERE  id IN ({})'.format(', '.join(['?']*len(ids_overlap)))
bbox2 = pd.read_sql(query, conn, params=ids_overlap)


def compare_two_curves(id1, id2, df):
    # Extract the series for the two IDs
    series1 = df[df['id'] == id1][['frame_number', 'distance_to_center']].sort_values('frame_number')
    series2 = df[df['id'] == id2][['frame_number', 'distance_to_center']].sort_values('frame_number')

    # Convert to numpy arrays
    series1 = series1.to_numpy()
    series2 = series2.to_numpy()

    # Apply DTW
    distance, path = fastdtw(series1, series2, dist=euclidean)
    
    return distance, path

# Replace 'id1' and 'id2' with the actual IDs you want to compare
id1 = ids_overlap[0]  # Replace with the first ID you want to compare
id2 = ids_overlap[1]  # Replace with the second ID you want to compare

# Use the compare_two_curves function
dtw_distance, dtw_path = compare_two_curves(id1, id2, bbox2)

print(f"DTW distance between ID {id1} and ID {id2}: {dtw_distance}")


#plot_path = create_plot(bbox2,primary_id, ids_overlap)

### Standarizar todas las pentiednes

In [None]:
#### NUEVO #### 

from scipy.stats import linregress

# Assuming 'bbox2' is loaded with the relevant data
query = 'SELECT id, distance_to_center, frame_number FROM bbox_raw'
bbox2 = pd.read_sql(query, conn)
# 1. Calculate slopes for each ID
slopes = {}
for id_group, group_data in bbox2.groupby('id'):
    slope, intercept, r_value, p_value, std_err = linregress(group_data['frame_number'], group_data['distance_to_center'])
    slopes[id_group] = slope


valid_slopes = {k: v for k, v in slopes.items() if not np.isnan(v)}
valid_slopes2 = {k: v for k, v in slopes.items() if np.isnan(v)}
average_slope = sum(valid_slopes.values()) / len(valid_slopes)
normalized_slopes = {id_group: slope / average_slope for id_group, slope in slopes.items()}
valid_slopes2


#### Filter undefined logic 1 count - 1

In [None]:
ids_overlap_total = overlap_results[overlap_results['count'] == 1]['id_overlap'].unique().tolist()

query = 'SELECT id, distance_to_center, frame_number FROM bbox_raw WHERE id IN ({})'.format(', '.join(['?']*len(ids_overlap_total)))

bbox_overlap = pd.read_sql(query, conn, params=ids_overlap_total)

df = bbox_overlap.sort_values(by=['id', 'frame_number'])
df['previous_distance_to_center'] = df.groupby('id')['distance_to_center'].shift(1)
crosses_zero = df.apply(lambda row: (row['distance_to_center'] < 0 < row['previous_distance_to_center']) or (row['distance_to_center'] > 0 > row['previous_distance_to_center']), axis=1)
ids_crossing_zero = df[crosses_zero]['id'].unique()

overlap_results[overlap_results['id_overlap'].isin(ids_crossing_zero)]['id'].unique().tolist()

### None Analysis 2.0

In [None]:
import pandas as pd
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw

#primary_id = 2469
count_overlaps = 1
query = """
    SELECT
    or2.id as base_id, 
    br.id,
    CASE WHEN or2.id = br.id THEN 1 ELSE 0 END as primary_id, 
    or2.id_overlap,
    br.frame_number,
    br.distance_to_center,
    or2.count,
    or2.direction
FROM 
    overlap_results AS or2 
LEFT JOIN 
    bbox_raw AS br ON br.id = or2.id OR br.id = or2.id_overlap 
WHERE 
    or2.count = ?
"""



bbox_analysis = pd.read_sql(query, conn, params=(count_overlaps,))



## Only the id that got primary_id = 1 must be in the top level of the dictionary, and for every id that got primary_id = 1 must be a id_overlap associate it with

# {
# 	primary_id : {
# 		direction : 'In',
# 		data : [{frame_number: 1, distance_to_center: 10}, {frame_number: 2, distance_to_center: 20}]
# 		id_overlap : {
# 			1 : {
# 				direction : 'Undefined',
# 				data : [{frame_number: 1, distance_to_center: 10}, {frame_number: 2, distance_to_center: 20}]
# 			}
# 		}
# 	}
# }

#create_plot(bbox_analysis,primary_id, bbox_analysis['id'].unique().tolist())


# from scipy.stats import linregress

# for id_group, group_data in bbox_analysis[bbox_analysis['direction'] == 'In'].groupby('id'):
#     slope, intercept, r_value, p_value, std_err = linregress(group_data['frame_number'], group_data['distance_to_center'])
#     slopes[id_group] = slope


# valid_slopes = {k: v for k, v in slopes.items() if not np.isnan(v)}
# #valid_slopes2 = {k: v for k, v in slopes.items() if np.isnan(v)}
# average_slope = sum(valid_slopes.values()) / len(valid_slopes)
# normalized_slopes = {id_group: slope / average_slope for id_group, slope in slopes.items()}
# normalized_slopes

In [26]:
structured_data = {}
data_slope = []

# Group the dataframe by 'base_id' to process each primary_id group
for base_id, group in bbox_analysis.groupby('base_id'):
    primary_data = group[group['primary_id'] == 1]
    overlap_data = group[group['primary_id'] == 0]

    # Get the minimum and maximum frame numbers for the undefined curve
    min_time_frame_undefined_curve = overlap_data['frame_number'].min()
    max_time_frame_undefined_curve = overlap_data['frame_number'].max()
    
    # Filter for overlap only when primary_id == 1 within the frame number range of the undefined curve
    only_overlap = primary_data[
        (primary_data['frame_number'] >= min_time_frame_undefined_curve) & 
        (primary_data['frame_number'] <= max_time_frame_undefined_curve)
    ]

    if only_overlap.empty:
        print('No overlap in ID:', base_id)
        continue

    # Calculate slope for only_overlap
    slope_primary = linregress(only_overlap['frame_number'], only_overlap['distance_to_center']).slope
    
    # Calculate slope for overlap_data
    slope_overlap = linregress(overlap_data['frame_number'], overlap_data['distance_to_center']).slope
    
    # Store the structured data
    structured_data[base_id] = {
        'direction': group['direction'].iloc[0],
        'data': only_overlap.to_dict('records'),
        'only_overlap': only_overlap.to_dict('records'),
        'slope': slope_primary,
        'id_overlap': {
            'direction': 'Undefined',
            'data': overlap_data.to_dict('records'),
            'slope': slope_overlap
        }
    }
    data_slope.append((base_id, overlap_data['id_overlap'].iloc[0], slope_primary, slope_overlap, abs(slope_primary - slope_overlap)))

data_slope_df = pd.DataFrame(data_slope, columns=['primary_id', 'id_overlap', 'slope_primary', 'slope_overlap', 'diff_slope'])
# Now you can use data_slope_df as a DataFrame
data_slope_df.to_csv('data_slope.csv', index=False)

No overlap in ID: 618
No overlap in ID: 876
No overlap in ID: 1169
No overlap in ID: 1247
No overlap in ID: 1434
No overlap in ID: 1607
No overlap in ID: 1705


  slope = ssxym / ssxm
  t = r * np.sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY)))
  slope_stderr = np.sqrt((1 - r**2) * ssym / ssxm / df)


No overlap in ID: 2214
No overlap in ID: 2700


  slope = ssxym / ssxm
  t = r * np.sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY)))
  slope_stderr = np.sqrt((1 - r**2) * ssym / ssxm / df)
  slope = ssxym / ssxm
  t = r * np.sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY)))
  slope_stderr = np.sqrt((1 - r**2) * ssym / ssxm / df)


No overlap in ID: 4404
No overlap in ID: 4554
No overlap in ID: 5782


  slope = ssxym / ssxm
  t = r * np.sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY)))
  slope_stderr = np.sqrt((1 - r**2) * ssym / ssxm / df)
  slope = ssxym / ssxm
  t = r * np.sqrt(df / ((1.0 - r + TINY)*(1.0 + r + TINY)))
  slope_stderr = np.sqrt((1 - r**2) * ssym / ssxm / df)


No overlap in ID: 6227
No overlap in ID: 6972
No overlap in ID: 7300
No overlap in ID: 7311
No overlap in ID: 8032


In [None]:
print(structured_data[3625]['slope'],structured_data[3625]['id_overlap']['slope'])

In [None]:
structured_data[2469]

In [None]:
bbox_analysis.head(500)

In [None]:
bbox_analysis[bbox_analysis['direction'] == 'In'].groupby('id')['id'].count().__len__()