# Query Performance Comparison Across Datasets

This notebook compares query performance between different methods across all available datasets. It focuses on:
- Comparing query execution times between standard and cached queries
- Analyzing performance by dataset and operation type
- Visualizing performance differences with clear and concise plots
- Aggregating results across multiple runs for more robust comparisons

In [37]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import re  # Add this import for regular expressions
from pathlib import Path
import scipy.stats as stats
import warnings
import matplotlib.ticker as ticker

from matplotlib.colors import LinearSegmentedColormap
warnings.filterwarnings('ignore')

# Set publication-ready plotting style
plt.rcParams.update({
    'font.family': 'serif',
    'font.serif': ['Times', 'Times New Roman', 'Palatino', 'DejaVu Serif'],
    'font.size': 12,
    'axes.titlesize': 18,
    'axes.labelsize': 18,
    'xtick.labelsize': 16,
    'ytick.labelsize': 16,
    'legend.fontsize': 14,
    'figure.figsize': [10, 6],
    'figure.dpi': 150,
    'savefig.dpi': 300,
    'savefig.bbox': 'tight',
    'savefig.pad_inches': 0.05,
    'axes.grid': True,
    'grid.alpha': 0.3,
    'axes.axisbelow': True})

# Define output folders
outFolder = "output_no_allocation"
groundTruthFolder = "output_no_allocation"

plt.rcParams['text.usetex'] = False  # Set to True only if you have LaTeX installed
plt.rcParams['mathtext.default'] = 'regular'

# Define the methods to compare
METHODS = [
    {
        "name": "M4-NoC",
        "path": f"../{groundTruthFolder}/timeQueries/",
        "method": "m4",
        "patternMethod": "m4",
        "database": "influx"
    },
    # {
    #     "name": "M4-C",
    #     "path": f"../{outFolder}/timeCacheQueries/",
    #     "method": "m4",
    #     "patternMethod": "m4",
    #     "database": "influx"
    # },
    # {
    #     "name": "M4$^\\infty$-C",
    #     "path": f"../{outFolder}/timeCacheQueries/",
    #     "method": "m4Inf",
    #     "patternMethod": "m4Inf",
    #     "database": "influx"
    # },
    {
        "name": "M2$^\\infty$-C",
        "path": f"../{outFolder}/timeCacheQueries/",
        "method": "minmax",
        "patternMethod": "minmax",
        "database": "influx"
    },
    # {
    #     "name": "MinMaxCache",
    #     "path": f"../{outFolder}/timeMinMaxCacheQueries/",
    #     "method": "minmax",
    #     "patternMethod": "minmaxcache",
    #     "database": "influx"
    # },
]

# Create a folder for saving publication-ready figures
FIGURES_DIR = "../figures"
os.makedirs(FIGURES_DIR, exist_ok=True)

# Generate a publication-ready color palette
# Using ColorBrewer-inspired palette for better distinction in papers
# METHOD_COLORS = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

# Create a consistent color mapping function
def get_method_color_mapping(methods_list):
    """
    Create a consistent color mapping for all methods used throughout the notebook.
    Ensures each method gets the same color in all plots.
    
    Parameters:
    -----------
    methods_list : list
        List of method names or method dictionaries
        
    Returns:
    --------
    dict: Mapping from method name to color
    """
    color_mapping = {}
    
    # Extract method names if dealing with method dictionaries
    if isinstance(methods_list[0], dict):
        method_names = [method['name'] for method in methods_list]
    else:
        method_names = methods_list
    
    # Define specific colors for known methods to ensure consistency
    predefined_colors = {
        'M4-NoC': '#1f77b4',        # Blue
        'M4-C': '#ff7f0e',          # Orange  
        'M4$^\\infty$-C': '#2ca02c', # Green
        'MinMaxCache': '#d62728',   # Red
        'M2$^\\infty$-C': '#9467bd', # Purple
    }
    
    # Assign predefined colors first
    color_idx = 0
    for method_name in method_names:
        if method_name in predefined_colors:
            color_mapping[method_name] = predefined_colors[method_name]
    return color_mapping

# Create global color mapping for all methods
GLOBAL_METHOD_COLORS = get_method_color_mapping(METHODS)

# Define a consistent style function for publication-ready plots
def set_publication_style(ax, title=None, xlabel=None, ylabel=None, legend_title=None):
    """Apply consistent publication-ready styling to matplotlib axis"""
    if title:
        ax.set_title(title, fontweight='bold')
    if xlabel:
        ax.set_xlabel(xlabel, fontweight='bold')
    if ylabel:
        ax.set_ylabel(ylabel, fontweight='bold')
    
    # Apply grid style
    ax.grid(True, linestyle='--', alpha=0.3)
    ax.set_axisbelow(True)
    
    # Style spines
    for spine in ax.spines.values():
        spine.set_linewidth(1.5)

    # Format legend if it exists
    if ax.get_legend():
        if legend_title:
            ax.legend(title=legend_title, frameon=True, facecolor='white', 
                     framealpha=0.9, edgecolor='black')    
    # Remove top and right spines
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    
    return ax

In [38]:
# Cache management setup
import pickle
import shutil
from datetime import datetime

# Define cache directory relative to notebook location
CACHE_DIR = "./analysis_cache"
os.makedirs(CACHE_DIR, exist_ok=True)

print(f"Analysis cache directory: {CACHE_DIR}")

Analysis cache directory: ./analysis_cache


## Cache Management

This notebook uses caching to avoid re-running expensive computations. The workflow is:

1. **Calculation cells**: Load data, perform computations, and cache results
2. **Plotting cells**: Load cached data and generate visualizations
3. **Cache management**: Clear, inspect, or manage cached data

**Cache files:**
- `query_results.pkl`: Aggregated query performance data
- `ssim_data.pkl`: SSIM calculation results  
- `ssim_operation_data.pkl`: SSIM analysis by operation type

Run the cache management cell below to inspect or clear cached data.

In [39]:
# Cache Management Utilities

def clear_analysis_cache():
    """Clear all cached analysis data"""
    if os.path.exists(CACHE_DIR):
        shutil.rmtree(CACHE_DIR)
        os.makedirs(CACHE_DIR, exist_ok=True)
        print(f"Cleared analysis cache directory: {CACHE_DIR}")
    else:
        print("Cache directory does not exist")

def list_cache_contents():
    """List contents of the cache directory"""
    if os.path.exists(CACHE_DIR):
        cache_files = os.listdir(CACHE_DIR)
        if cache_files:
            print(f"Cache directory contents ({CACHE_DIR}):")
            for file in cache_files:
                file_path = os.path.join(CACHE_DIR, file)
                size = os.path.getsize(file_path)
                mod_time = os.path.getmtime(file_path)
                mod_time_str = datetime.fromtimestamp(mod_time).strftime('%Y-%m-%d %H:%M:%S')
                print(f"  - {file} ({size:,} bytes, modified: {mod_time_str})")
        else:
            print("Cache directory is empty")
    else:
        print("Cache directory does not exist")

def get_cache_info():
    """Get information about cached data files"""
    cache_files = {
        'query_results.pkl': 'Query performance data',
        'ssim_data.pkl': 'SSIM calculation results',
        'ssim_operation_data.pkl': 'SSIM analysis by operation'
    }
    
    for filename, description in cache_files.items():
        filepath = os.path.join(CACHE_DIR, filename)
        if os.path.exists(filepath):
            size = os.path.getsize(filepath)
            mod_time = os.path.getmtime(filepath)
            mod_time_str = datetime.fromtimestamp(mod_time).strftime('%Y-%m-%d %H:%M:%S')
            
            print(f"{description} ({filename}):")
            print(f"  - Size: {size:,} bytes")
            print(f"  - Modified: {mod_time_str}")
            
            # Try to load and show basic info
            try:
                with open(filepath, 'rb') as f:
                    cached_data = pickle.load(f)
                
                if filename == 'query_results.pkl':
                    if 'all_combined' in cached_data and cached_data['all_combined'] is not None:
                        df = cached_data['all_combined']
                        print(f"  - Records: {len(df)}")
                        print(f"  - Datasets: {', '.join(df['dataset'].unique())}")
                        print(f"  - Methods: {', '.join(df['method'].unique())}")
                
                elif filename == 'ssim_data.pkl':
                    if 'ssim_df' in cached_data and cached_data['ssim_df'] is not None:
                        df = cached_data['ssim_df']
                        print(f"  - Records: {len(df)}")
                        print(f"  - Datasets: {', '.join(df['dataset'].unique())}")
                        print(f"  - Methods: {', '.join(df['method'].unique())}")
                
                elif filename == 'ssim_operation_data.pkl':
                    if 'ssim_with_ops' in cached_data and cached_data['ssim_with_ops'] is not None:
                        df = cached_data['ssim_with_ops']
                        print(f"  - Records: {len(df)}")
                        print(f"  - Operations: {', '.join(df['operation'].unique())}")
            except Exception as e:
                print(f"  - Error reading cache: {str(e)}")
            print()
        else:
            print(f"{description} ({filename}): Not cached")

# Uncomment the line below to clear all cache and force recalculation
# clear_analysis_cache()

# Show current cache status
print("=== Cache Status ===")
get_cache_info()

=== Cache Status ===
Query performance data (query_results.pkl): Not cached
SSIM calculation results (ssim_data.pkl): Not cached
SSIM analysis by operation (ssim_operation_data.pkl): Not cached


## Load Query Results Data

Load experiment results from all available datasets, aggregating across runs.

In [40]:
def load_results(base_path, method, database_type, table_name):
    """
    Load results from multiple experiment runs into a single dataframe
    
    Parameters:
    -----------
    base_path : str
        Base path to the queries directory
    method : str    
        Name of the method used (e.g., m4Inf, m4)
    database_type : str
        Type of database (influx, postgres, etc.)
    table_name : str
        Name of the database table
        
    Returns:
    --------
    pd.DataFrame or None: Combined results from all runs
    """
    path_pattern = os.path.join(base_path, method, database_type, table_name, "run_*", "results.csv")
    csv_files = glob.glob(path_pattern)
    
    if not csv_files:
        return None
    
    dfs = []
    for csv_file in csv_files:
        run_name = os.path.basename(os.path.dirname(csv_file))
        df = pd.read_csv(csv_file)
        df['run'] = run_name
        df['dataset'] = table_name
        dfs.append(df)
    
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Convert date columns to datetime if they exist
    date_columns = ['from', 'to']
    for col in date_columns:
        if col in combined_df.columns:
            combined_df[col] = pd.to_datetime(combined_df[col])
            
    # Add duration column
    if 'from' in combined_df.columns and 'to' in combined_df.columns:
        combined_df['duration_sec'] = (combined_df['to'] - combined_df['from']).dt.total_seconds()
    
    return combined_df

# Function to detect and filter out outlier runs
def filter_outlier_runs(df, outlier_threshold=2.0):
    """
    Filter out runs that have significantly different execution times compared to other runs
    for the same query using the Interquartile Range (IQR) method.
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame containing results from multiple runs
    outlier_threshold : float
        IQR multiplier for outlier detection (default: 2.0 for more conservative filtering)
        
    Returns:
    --------
    pd.DataFrame: DataFrame with outlier runs removed
    """
    if df is None or len(df) == 0:
        return df
    
    # Group by query characteristics to identify the same query across runs
    groupby_cols = ['dataset', 'query #', 'query_type', 'group_by', 'aggregation', 'time_interval']
    group_cols = [col for col in groupby_cols if col in df.columns]
    
    filtered_dfs = []
    outliers_removed = 0
    
    for group_key, group_df in df.groupby(group_cols):
        if len(group_df) <= 2:  # Don't filter if we have 2 or fewer runs
            filtered_dfs.append(group_df)
            continue
        
        # Calculate IQR for execution times
        times = group_df['Time (sec)']
        Q1 = times.quantile(0.25)
        Q3 = times.quantile(0.75)
        IQR = Q3 - Q1
        
        # Define outlier bounds
        lower_bound = Q1 - outlier_threshold * IQR
        upper_bound = Q3 + outlier_threshold * IQR
        
        # Filter out outliers
        mask = (times >= lower_bound) & (times <= upper_bound)
        filtered_group = group_df[mask]
        
        # Only apply filtering if we still have at least 2 runs after filtering
        if len(filtered_group) >= 2:
            outliers_removed += len(group_df) - len(filtered_group)
            filtered_dfs.append(filtered_group)
        else:
            # Keep original data if filtering would leave us with too few runs
            filtered_dfs.append(group_df)
    
    if filtered_dfs:
        result_df = pd.concat(filtered_dfs, ignore_index=True)
        if outliers_removed > 0:
            print(f"    Filtered out {outliers_removed} outlier runs")
        return result_df
    else:
        return df

# Function to aggregate results from multiple runs
def aggregate_runs(df):
    """
    Aggregate results from multiple runs by grouping by query characteristics
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame containing results from multiple runs
        
    Returns:
    --------
    pd.DataFrame: Aggregated results with statistics across runs
    """
    if df is None or len(df) == 0:
        return None
    
    # First, filter out outlier runs
    df_filtered = filter_outlier_runs(df)
    
    # Group by query characteristics (not by run)
    groupby_cols = ['dataset', 'query #', 'query_type', 'group_by', 'aggregation', 'time_interval']
    group_cols = [col for col in groupby_cols if col in df_filtered.columns]
    
    # Aggregate the Time (sec) column across runs
    agg_df = df_filtered.groupby(group_cols).agg({
        'Time (sec)': ['mean', 'median', 'std', 'min', 'max', 'count'],
        'run': 'nunique'  # Count number of runs
    }).reset_index()
    
    # Flatten the multi-level column names
    agg_df.columns = [f"{col[0]}_{col[1]}" if col[1] else col[0] for col in agg_df.columns]
    
    # Rename some columns for clarity
    agg_df = agg_df.rename(columns={
        'Time (sec)_mean': 'Time (sec)', 
        'Time (sec)_count': 'query_count',
        'run_nunique': 'run_count'
    })
    
    return agg_df

# Operation type mapping for better readability
def get_operation_type_mapping():
    return {
        'P': 'Pan',
        'ZI': 'Zoom In',
        'ZO': 'Zoom Out',
        'R': 'Resize',
        'MC': 'Measure Change',
        'PD': 'Pattern Detection',
        'NaN': 'Initial Query'
    }

## Find All Available Datasets and Load Data

In [41]:
# Query Data Loading and Aggregation - CALCULATION CELL
# This cell loads raw query data, performs aggregation, and caches the results

def find_datasets(method_info):
    """Find all available datasets by looking at directories"""
    base_path = method_info["path"]
    method = method_info["method"]
    database = method_info["database"]
    
    # Find all dataset directories
    dataset_pattern = os.path.join(base_path, method, database, "*")
    datasets = []
    
    for dataset_dir in glob.glob(dataset_pattern):
        if os.path.isdir(dataset_dir):
            dataset_name = os.path.basename(dataset_dir)
            datasets.append(dataset_name)
    
    return datasets

# Check if query results are already cached
query_cache_file = os.path.join(CACHE_DIR, "query_results.pkl")

if os.path.exists(query_cache_file):
    print("Loading cached query results...")
    with open(query_cache_file, 'rb') as f:
        cached_data = pickle.load(f)
        raw_results_by_dataset = cached_data.get('raw_results_by_dataset', {})
        results_by_dataset = cached_data.get('results_by_dataset', {})
        all_combined = cached_data.get('all_combined')
        all_datasets = cached_data.get('all_datasets', [])
    
    if all_combined is not None:
        print(f"Loaded cached data: {len(all_combined)} aggregated queries across {len(all_datasets)} datasets")
        print(f"Datasets: {', '.join(all_datasets)}")
        print(f"Methods: {', '.join(all_combined['method'].unique())}")
    else:
        print("Warning: Cached data appears to be empty")
else:
    print("Computing query results from scratch...")
    
    # Get unique datasets from all methods
    all_datasets = set()
    for method in METHODS:
        datasets = find_datasets(method)
        all_datasets.update(datasets)

    all_datasets = sorted(list(all_datasets))
    print(f"Found {len(all_datasets)} datasets: {all_datasets}")

    # Load data for each method and dataset
    raw_results_by_dataset = {}  # Store raw results
    results_by_dataset = {}      # Store aggregated results
    all_results = []             # Store all aggregated results

    for dataset in all_datasets:
        print(f"\nLoading data for dataset: {dataset}")
        dataset_results = []
        dataset_raw_results = []
        
        for i, method in enumerate(METHODS):
            print(f"  Loading {method['name']}...")
            
            df = load_results(
                base_path=method['path'],
                method=method['method'],
                database_type=method['database'],
                table_name=dataset
            )
            
            if df is not None and not df.empty:
                # Add method name and color index
                df['method'] = method['name']
                df['method_idx'] = i
                
                # Add readable operation type
                op_type_map = get_operation_type_mapping()
                df['operation'] = df.apply(
                    lambda row: 'Initial Query' if pd.isna(row['query_type']) else op_type_map.get(row['query_type'], row['query_type']), 
                    axis=1
                )
                
                # Store raw results first
                dataset_raw_results.append(df)
                
                # Aggregate results across runs
                agg_df = aggregate_runs(df)
                if agg_df is not None:
                    # Add method name and operation type to aggregated data
                    agg_df['method'] = method['name']
                    agg_df['method_idx'] = i
                    agg_df['operation'] = agg_df.apply(
                        lambda row: 'Initial Query' if pd.isna(row['query_type']) else op_type_map.get(row['query_type'], row['query_type']), 
                        axis=1
                    )
                    
                    dataset_results.append(agg_df)
                    all_results.append(agg_df)
                    print(f"    Loaded {len(df)} queries from {agg_df['run_count'].iloc[0]} runs, aggregated to {len(agg_df)} unique queries")
                else:
                    print(f"    Error aggregating results")
            else:
                print(f"    No data found")
        
        if dataset_raw_results:
            raw_results_by_dataset[dataset] = pd.concat(dataset_raw_results, ignore_index=True)
        
        if dataset_results:
            results_by_dataset[dataset] = pd.concat(dataset_results, ignore_index=True)

    # Combine all results into a single dataframe for overall analysis
    if all_results:
        all_combined = pd.concat(all_results, ignore_index=True)
        print(f"\nLoaded a total of {len(all_combined)} aggregated queries across {len(all_datasets)} datasets")
        
        # Print run count information
        for dataset in results_by_dataset:
            for method in METHODS:
                method_name = method['name']
                method_data = results_by_dataset[dataset][results_by_dataset[dataset]['method'] == method_name]
                if not method_data.empty:
                    run_count = method_data['run_count'].iloc[0]
                    print(f"Dataset: {dataset}, Method: {method_name}, Runs: {run_count}")
    else:
        all_combined = None
        print("\nNo data was loaded.")
    
    # Cache the results
    cache_data = {
        'raw_results_by_dataset': raw_results_by_dataset,
        'results_by_dataset': results_by_dataset,
        'all_combined': all_combined,
        'all_datasets': all_datasets
    }
    
    # with open(query_cache_file, 'wb') as f:
        # pickle.dump(cache_data, f)
    print(f"\nCached query results to {query_cache_file}")

Computing query results from scratch...
Found 0 datasets: []

No data was loaded.

Cached query results to ./analysis_cache/query_results.pkl


## Performance Comparison by Operation Type

Let's break down the performance by operation type for each dataset using the aggregated results.

In [42]:
# Performance Comparison by Operation Type - PLOTTING CELL
# This cell loads cached query data and generates operation-type performance visualizations

if 'all_combined' in globals() and all_combined is not None and 'results_by_dataset' in globals():
    # Get unique operations across all datasets
    all_ops = sorted(all_combined['operation'].unique())
    
    # Create publication-ready figures for each dataset separately
    for i, dataset in enumerate(all_datasets):
        if dataset in results_by_dataset:
            dataset_df = results_by_dataset[dataset]
            method_names = dataset_df['method'].unique()
            run_count = dataset_df['run_count'].iloc[0]
            
            # Group data by operation type and method
            op_perf = dataset_df.groupby(['operation', 'method'])['Time (sec)'].mean().reset_index()
            
            # Remove rows with NaN operations to avoid plotting issues
            op_perf = op_perf.dropna(subset=['operation'])
            
            # Get operations for this dataset and sort them in a meaningful order
            operations = op_perf['operation'].unique()
            op_order = ['Initial Query', 'Pan', 'Zoom In', 'Zoom Out', 'Resize', 'Measure Change', 'Pattern Detection']
            operations = sorted(operations, key=lambda x: op_order.index(x) if x in op_order else 999)
            
            # Create the figure
            fig, ax = plt.subplots(figsize=(12, 7))
            
            # Set bar properties
            bar_width = 0.6 / len(method_names)
            opacity = 0.8
            bar_positions = np.arange(len(operations))
            
            method_handles = []  # Store handles for legend
            
            for j, method in enumerate(method_names):
                method_data = op_perf[op_perf['method'] == method]
                # Create a lookup dict by operation
                method_by_op = {row['operation']: row['Time (sec)'] for _, row in method_data.iterrows()}
                
                # Extract values in the correct order
                values = [method_by_op.get(op, 0) for op in operations]
                
                # Plot bars with consistent colors
                offset = (j - len(method_names)/2 + 0.5) * bar_width
                bars = ax.bar(
                    bar_positions + offset, 
                    values, 
                    bar_width,
                    color=GLOBAL_METHOD_COLORS[method], 
                    label=method,
                    edgecolor='black',
                    linewidth=1,
                    alpha=opacity
                )
                
                # Add value labels on top of bars
                for k, bar in enumerate(bars):
                    height = bar.get_height()
                    if height > 0:  # Only add labels for non-zero values
                        ax.text(
                            bar.get_x() + bar.get_width()/2,
                            height + 0.05,
                            f'{height:.2f}',
                            ha='center', 
                            va='bottom',
                            rotation=0
                        )
            
            # Set the x-axis labels
            ax.set_xticks(bar_positions)
            ax.set_xticklabels(operations, rotation=45, ha='right')
            
            # Set labels and title
            title = f'Query Performance by Operation Type - {dataset}'
            subtitle = f'Average across {run_count} runs'
            ax.set_title(f'{title}\n{subtitle}', pad=20)
            ax.set_xlabel('Operation Type')
            ax.set_ylabel('Average Time (seconds)')
            # Create legend with pattern detection highlight (only once)
            legend_handles = method_handles
            
            # Create legend with custom handles
            ax.legend(handles=legend_handles, title="Query Method", loc='upper right')
                
            # Apply publication styling (legend will be created automatically from labels)
            set_publication_style(ax, legend_title='Query Method')
            
            # Adjust layout
            plt.tight_layout()
            
            # Save figure
            filename_safe = dataset.replace('/', '_').replace(' ', '_')
            plt.savefig(os.path.join(FIGURES_DIR, f"operations_{filename_safe}.pdf"))
            plt.savefig(os.path.join(FIGURES_DIR, f"operations_{filename_safe}.png"))
            plt.show()
else:
    print("No data available for comparison.")

No data available for comparison.


## Query Execution Time Evolution

Let's visualize how query times evolve across the sequence of operations, highlighting pattern detection queries.

In [43]:
if all_combined is not None:
    # Plot time series for each dataset
    for dataset in all_datasets:
        if dataset in results_by_dataset:
            dataset_df = results_by_dataset[dataset]
            run_count = dataset_df['run_count'].iloc[0]
            
            # Create a publication-ready time series plot
            fig, ax = plt.subplots(figsize=(12, 7))
            
            methods = dataset_df['method'].unique()
            
            # Define markers and line styles by operation type
            markers = {'Pattern Detection': '*', 'Other': 'o'}
            
            # Create a variable to track the max y-value for annotation positioning
            max_y = 0
            
            # Add highlighting for pattern detection queries
            pattern_queries = dataset_df[dataset_df['operation'] == 'Pattern Detection']['query #'].unique()
            for query_num in pattern_queries:
                ax.axvline(x=query_num, color='lightgray', linestyle='--', alpha=0.5, zorder=0)

            # Plot each method
            method_handles = []
            highlight_handle = None
            for i, method in enumerate(methods):
                method_data = dataset_df[dataset_df['method'] == method].sort_values('query #')
                pd_data = method_data[method_data['operation'] == 'Pattern Detection'].sort_values('query #')

                # Use consistent color for this method
                color = GLOBAL_METHOD_COLORS[method]
                
                # Plot standard queries with error bars
                line = ax.errorbar(
                    method_data['query #'], 
                    method_data['Time (sec)'],
                    yerr=method_data['Time (sec)_std'],
                    label=method,
                    marker=markers['Other'], 
                    markersize=7, 
                    alpha=0.9,
                    color=color, 
                    linestyle='-', 
                    linewidth=2,
                    capsize=4,
                    capthick=1,
                    elinewidth=1
                )
                method_handles.append(line)

                sz = 0.05                                
           
            # Set proper titles and labels
            title = f'Query Execution Time Evolution - {dataset}'
            subtitle = f'Average of {run_count} runs with standard deviation'
            ax.set_title(f'{title}\n{subtitle}', pad=20)
            ax.set_xlabel('Query Sequence Number')
            ax.set_ylabel('Execution Time (seconds)')
            
            # Format x-axis as integers
            ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
            
            # Create legend with pattern detection highlight (only once)
            legend_handles = method_handles
            if highlight_handle is not None:  # Only add if pattern queries exist
                legend_handles.append(highlight_handle)
            
            # Create legend with custom handles
            ax.legend(handles=legend_handles, title="Query Method / Highlight", loc='upper right')
            
            # Apply publication styling
            set_publication_style(ax, legend_title='Query Method')
            
            plt.tight_layout()
            
            # Save figure
            filename_safe = dataset.replace(' ', '_').lower()
            plt.savefig(os.path.join(FIGURES_DIR, f"time_evolution_{filename_safe}.pdf"))
            plt.savefig(os.path.join(FIGURES_DIR, f"time_evolution_{filename_safe}.png"))
            plt.show()
else:
    print("No data available for plotting time evolution.")

No data available for plotting time evolution.


## Performance Distribution Analysis

Compare the distribution of query times between methods for each dataset using aggregated statistics.

In [44]:
if all_combined is not None:
   
    # We'll use raw_results_by_dataset to get the minimum value for each query
    for dataset in all_datasets:
        if dataset in raw_results_by_dataset:
            dataset_df = raw_results_by_dataset[dataset]
            run_count = len(dataset_df['run'].unique())
            
            # Create publication-ready chart for performance distribution
            fig, ax = plt.subplots(figsize=(10, 7))
            
            methods = dataset_df['method'].unique()
            positions = np.arange(len(methods))
            width = 0.6
            
            # Create a more sophisticated boxplot-like visualization using minimum times
            for i, method in enumerate(methods):
                # Get data for this method
                method_data = dataset_df[dataset_df['method'] == method]
                
                # Calculate the minimum time for each unique query across all runs
                min_times_by_query = method_data.groupby(['query #'])['Time (sec)'].min().reset_index()
                
                # Calculate statistics on these minimum times
                mean_val = min_times_by_query['Time (sec)'].mean()
                median_val = min_times_by_query['Time (sec)'].median()
                min_val = min_times_by_query['Time (sec)'].min()
                max_val = min_times_by_query['Time (sec)'].max()
                std_val = min_times_by_query['Time (sec)'].std()
                
                # Draw box - Ensure box doesn't go below zero
                box_bottom = max(0, median_val - std_val/2)  # Use max() to prevent negative values
                box_height = std_val
                box = plt.Rectangle(
                    (i-width/2, box_bottom),
                    width, box_height,
                    alpha=0.7,
                    facecolor=GLOBAL_METHOD_COLORS[method],
                    edgecolor='black',
                    linewidth=1.5
                )
                ax.add_patch(box)
                
                # Draw median line
                ax.plot([i-width/2, i+width/2], [median_val, median_val], 
                        color='white', linewidth=2.5, solid_capstyle='round')
                ax.plot([i-width/2, i+width/2], [median_val, median_val], 
                        color='black', linewidth=1.5, solid_capstyle='round')
                
                # Draw whiskers (min-max) - Ensure whiskers don't go below zero
                ax.plot([i, i], [max(0, min_val), box_bottom], 
                        color='black', linewidth=1.5, linestyle='-')
                ax.plot([i, i], [box_bottom + box_height, max_val], 
                        color='black', linewidth=1.5, linestyle='-')
                
                # Draw caps on whiskers
                whisker_width = width / 4
                ax.plot([i-whisker_width, i+whisker_width], [max(0, min_val), max(0, min_val)], 
                        color='black', linewidth=1.5)
                ax.plot([i-whisker_width, i+whisker_width], [max_val, max_val], 
                        color='black', linewidth=1.5)
                
                # Draw mean point
                ax.plot(i, mean_val, 'o', color='white', markersize=8)
                ax.plot(i, mean_val, 'o', color='black', markersize=6)
                
                # Add annotation with statistics
                stats_text = (
                    f"n={len(min_times_by_query)}\n"
                    f"mean={mean_val:.2f}s\n"
                    f"median={median_val:.2f}s\n"
                    f"std={std_val:.2f}\n"
                    f"min={min_val:.2f}s\n"
                    f"max={max_val:.2f}s"
                )
                
                # Improved stats text positioning strategy
                text_x = i + width * 0.7  # Position to the right side of the box
                text_y = (box_bottom + box_height + max_val) / 2  # Middle between box top and max whisker
                
                # Alternative positioning based on dataset characteristics
                if max_val > mean_val * 3:  # If we have extreme outliers
                    text_y = median_val + std_val  # Place near the upper part of the box
                
                # Ensure text is always inside the plot area
                y_min, y_max = ax.get_ylim()
                text_y = min(max(text_y, y_min + (y_max - y_min) * 0.15), y_max * 0.85)
                
                # Add connecting line from box to annotation
                # ax.annotate(
                #     stats_text,
                #     xy=(i, median_val),  # Start from the median line
                #     xytext=(text_x, text_y),  # End at the text position
                #     textcoords="data",
                #     ha='left',
                #     va='center',
                #     bbox=dict(boxstyle="round,pad=0.5", fc="white", ec="gray", alpha=0.9),
                #     arrowprops=dict(arrowstyle="-", color="gray", connectionstyle="arc3,rad=0.3")
                # )
            
            # Set axis properties
            ax.set_xticks(positions)
            ax.set_xticklabels(methods)
            
            # Set titles and labels
            title = f'Query Time Distribution - {dataset}'
            subtitle = f'Using minimum query times across {run_count} runs'
            ax.set_title(f'{title}\n{subtitle}', pad=20)
            ax.set_xlabel('Query Method')
            ax.set_ylabel('Execution Time (seconds)')
            
            # Plot actual data points with jitter for better visibility
            for i, method in enumerate(methods):
                # Get data for this method
                method_data = dataset_df[dataset_df['method'] == method]
                
                # Get minimum times for each query across runs
                min_times_by_query = method_data.groupby(['query #'])['Time (sec)'].min().values
                
                # Create jitter for better point separation
                jitter = np.random.uniform(-width/3, width/3, size=len(min_times_by_query))
                
                # Plot individual points with semi-transparency
                ax.scatter(
                    [i + j for j in jitter], 
                    min_times_by_query,
                    s=30,
                    alpha=0.5,
                    color=GLOBAL_METHOD_COLORS[method],
                    edgecolor='black',
                    linewidth=0.5,
                    zorder=3
                )
            
            # Apply publication styling but without legend
            set_publication_style(ax)
            
            # Adjust y-axis range
            y_min, y_max = ax.get_ylim()
            margin = (y_max - y_min) * 0.05
            ax.set_ylim(y_min - margin, y_max + margin)
            
            plt.tight_layout()
            
            # Save figure
            filename_safe = dataset.replace(' ', '_').lower()
            plt.savefig(os.path.join(FIGURES_DIR, f"min_time_distribution_{filename_safe}.pdf"))
            plt.savefig(os.path.join(FIGURES_DIR, f"min_time_distribution_{filename_safe}.png"))
            plt.show()
    
else:
    print("No data available for distribution analysis.")

No data available for distribution analysis.


In [45]:
import re
from sklearn.metrics import f1_score, precision_score, recall_score

def extract_patterns(filepath):
    # Extract main [start to end] intervals per pattern
    with open(filepath, "r") as f:
        content = f.read()
    pattern = re.compile(r"Match #\d+: \[(\d+) to (\d+)\]")
    return set((int(m.group(1)), int(m.group(2))) for m in pattern.finditer(content))

def compute_f1(gt_file, pred_file):
    gt_patterns = extract_patterns(gt_file)
    pred_patterns = extract_patterns(pred_file)
    # For F1, construct binary indicator vectors over union of all intervals
    all_patterns = sorted(gt_patterns | pred_patterns)
    gt_labels = [1 if p in gt_patterns else 0 for p in all_patterns]
    pred_labels = [1 if p in pred_patterns else 0 for p in all_patterns]
    precision = precision_score(gt_labels, pred_labels)
    recall = recall_score(gt_labels, pred_labels)
    f1 = f1_score(gt_labels, pred_labels)
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    return f1

## Pattern Detection Accuracy Comparison

Compare the accuracy of pattern detection methods against the ground truth using the F1 score metric.

In [46]:
import os
import re
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score

# Function to find ground truth pattern match files
def find_ground_truth_files(db, dataset):
    """
    Find all ground truth pattern match log files
    
    Parameters:
    -----------
    base_dir : str
        Base directory to search for ground truth files
        
    Returns:
    --------
    list: List of ground truth file information
    """ 
    # Get the absolute path to the pattern_matches directory
    pattern_dir = os.path.join("..", f"{outFolder}/pattern_matches/ground_truth/{db}/{dataset}")
    
    # Check if directory exists
    if not os.path.exists(pattern_dir):
        print(f"Directory '{pattern_dir}' does not exist.")
        return []
    
    # Find all ground truth log files
    gt_pattern = os.path.join(pattern_dir, "*.log")
    gt_files = glob.glob(gt_pattern)
    
    if not gt_files:
        print(f"No ground truth files found in '{pattern_dir}'.")
        return []
    
    # Parse ground truth files information
    gt_info = []
    method_pattern = re.compile(r"(.+?)_(\d+)_(\d+)_(.+?)\.log$")
    
    for file_path in gt_files:
        filename = os.path.basename(file_path)
        match = method_pattern.match(filename)
        
        if match:
            start_ts = match.group(1)
            end_ts = match.group(2)
            measure = match.group(3)
            time_unit = match.group(4)

            gt_info.append({
                'path': file_path,
                'database': db,
                'dataset': dataset,
                'start_ts': start_ts,
                'end_ts': end_ts,
                'measure': measure,
                'time_unit': time_unit,
                'filename': filename
            })
    
    print(f"Found {len(gt_info)} ground truth files.")
    return gt_info

# Function to find corresponding method files for a ground truth file
def find_method_file(gt_info, method, base_dir="pattern_matches"):
    """
    Find method files that correspond to a ground truth file
    
    Parameters:
    -----------
    gt_info : dict
        Ground truth file information
    method : string
        List of method information dictionaries
    base_dir : str
        Base directory to search for method files
        
    Returns:
    --------
    dict: Dictionary of method files
    """
    method_files = {}
    
    method_name = method["patternMethod"]
    # Construct a pattern to match files for this method and dataset
    method_pattern = f"{method_name}/{gt_info['database']}/{gt_info['dataset']}/{gt_info['start_ts']}_{gt_info['end_ts']}_{gt_info['measure']}_{gt_info['time_unit']}.log"
    method_path = os.path.join("..", f"{outFolder}/{base_dir}", method_pattern)

    if os.path.exists(method_path):
        return {
            'path': method_path,
            'method_name': method_name,
            'display_name': method["name"]
        }
    return None;

# Compare pattern detection accuracy for all methods against ground truth
def compare_pattern_detection(methods, base_dir="pattern_matches"):
    """
    Compare pattern detection accuracy for all methods against ground truth
    
    Parameters:
    -----------
    methods : list
        List of method information dictionaries
    base_dir : str
        Base directory containing pattern match files
    
    Returns:
    --------
    pd.DataFrame: DataFrame containing accuracy metrics
    """
 
    results = []
    
    # For each ground truth file, find corresponding method files and compare
    for i, method in enumerate(METHODS):
        if(i == 0): continue  # Skip the first method (baseline)
        for dataset in all_datasets:
            gt_files = find_ground_truth_files(method['database'], dataset)

            for gt_file in gt_files:
                # Find method files for this ground truth
                method_file = find_method_file(gt_file, method, base_dir)
                if not method_file:
                    print(f"No method file found for method {method['name']} and dataset {dataset}")
                    continue
                # Load ground truth patterns
                gt_path = gt_file['path']
                try:
                    gt_patterns = extract_patterns(gt_path)
                    print(f"Ground truth: {os.path.basename(gt_path)}")
                    print(f"Found {len(gt_patterns)} ground truth patterns")
                except Exception as e:
                    print(f"Error loading ground truth file: {str(e)}")
                    continue
                
                # Compare each method against ground truth
                pred_path = method_file['path']
                display_name = method_file['display_name']
                
                print(f"\nComparing {display_name} vs ground truth:")
                print(f"Method file: {os.path.basename(pred_path)}")
                
                try:
                    # Load predicted patterns
                    pred_patterns = extract_patterns(pred_path)
                    print(f"Found {len(pred_patterns)} predicted patterns")
                    
                    # FIXED CALCULATION: Calculate metrics using set operations
                    # True Positives: patterns that are in both ground truth and predictions
                    true_positives = len(gt_patterns & pred_patterns)
                    
                    # False Positives: patterns predicted but not in ground truth
                    false_positives = len(pred_patterns - gt_patterns)
                    
                    # False Negatives: patterns in ground truth but not predicted
                    false_negatives = len(gt_patterns - pred_patterns)
                    
                    # Calculate precision, recall, and F1 using the standard formulas
                    if true_positives + false_positives == 0:
                        precision = 1.0 if true_positives + false_negatives == 0 else 0.0
                    else:
                        precision = true_positives / (true_positives + false_positives)
                    
                    if true_positives + false_negatives == 0:
                        recall = 1.0 if true_positives + false_positives == 0 else 0.0
                    else:
                        recall = true_positives / (true_positives + false_negatives)
                    
                    if precision + recall == 0:
                        f1 = 0.0
                    else:
                        f1 = 2 * (precision * recall) / (precision + recall)
                    
                    # Count number of patterns
                    num_gt_patterns = len(gt_patterns)
                    num_pred_patterns = len(pred_patterns)
                    num_correct_patterns = true_positives
                    
                    results.append({
                        'dataset': dataset,
                        'method': display_name,
                        'method_id': method_file['method_name'],
                        'measure': gt_file['measure'],
                        'time_unit': gt_file['time_unit'],
                        'precision': precision,
                        'recall': recall,
                        'f1': f1,
                        'gt_patterns': num_gt_patterns,
                        'pred_patterns': num_pred_patterns,
                        'correct_patterns': num_correct_patterns,
                        'true_positives': true_positives,
                        'false_positives': false_positives,
                        'false_negatives': false_negatives
                    })
                    
                    print(f"True Positives: {true_positives}")
                    print(f"False Positives: {false_positives}")
                    print(f"False Negatives: {false_negatives}")
                    print(f"Precision:  {precision:.4f}")
                    print(f"Recall:     {recall:.4f}")
                    print(f"F1 Score:   {f1:.4f}")
                    print(f"GT Patterns: {num_gt_patterns}, Predicted: {num_pred_patterns}, Correct: {num_correct_patterns}")
                    
                except Exception as e:
                    print(f"Error comparing {display_name} to ground truth: {str(e)}")
    
    # Convert results to DataFrame
    if results:
        results_df = pd.DataFrame(results)
        return results_df
    else:
        print("No comparison results generated.")
        return None

# Run the pattern detection accuracy comparison
print("Starting pattern detection accuracy comparison...")
accuracy_results = compare_pattern_detection(METHODS, "pattern_matches")

if accuracy_results is not None:
    print("\nSummary of pattern detection accuracy:")
    display(accuracy_results)
    
    # Create visualizations if we have results
    if not accuracy_results.empty:
        # Plot F1 scores by dataset and method
        plt.figure(figsize=(12, 8))
        
        # Group by dataset and method to get mean F1 scores
        summary = accuracy_results.groupby(['dataset', 'method'])['f1'].mean().reset_index()
        
        # # Create a pivot table for better visualization
        # pivot_data = summary.pivot(index='dataset', columns='method', values='f1')
        
        # # Plot heatmap
        # ax = sns.heatmap(pivot_data, annot=True, fmt=".3f", cmap="YlGnBu", 
        #                vmin=0, vmax=1, linewidths=0.5)
        # plt.title("Pattern Detection F1 Score by Dataset and Method", pad=20)
        # plt.tight_layout()
        
        # # Save figure
        # plt.savefig(os.path.join(FIGURES_DIR, "pattern_detection_f1_scores.pdf"))
        # plt.savefig(os.path.join(FIGURES_DIR, "pattern_detection_f1_scores.png"))
        # plt.show()
        
        # Create bar charts for individual metrics
        metrics = ['precision', 'recall', 'f1']
        fig, axes = plt.subplots(len(metrics), 1, figsize=(12, 15))
        
        for i, metric in enumerate(metrics):
            # Group by dataset and method
            summary = accuracy_results.groupby(['dataset', 'method'])[metric].mean().reset_index()
            
            # Create the grouped bar chart with consistent colors
            method_names = summary['method'].unique()
            palette = [GLOBAL_METHOD_COLORS.get(method, '#1f77b4') for method in method_names]
            sns.barplot(data=summary, x='dataset', y=metric, hue='method', ax=axes[i], palette=palette)
            
            # Set titles and labels
            axes[i].set_title(f"Pattern Detection {metric.capitalize()} by Dataset and Method", pad=10)
            axes[i].set_xlabel('Dataset')
            axes[i].set_ylabel(metric.capitalize())
            
            # Apply publication styling
            set_publication_style(axes[i], legend_title='Method')
            
            # Adjust y-axis limits
            axes[i].set_ylim(0, 1.05)
            
            # Add text labels above bars
            for p in axes[i].patches:
                axes[i].annotate(f"{p.get_height():.3f}", 
                              (p.get_x() + p.get_width() / 2., p.get_height()),
                              ha = 'center', va = 'bottom')
        
        plt.tight_layout()
        
        # Save figure
        plt.savefig(os.path.join(FIGURES_DIR, "pattern_detection_metrics.pdf"))
        plt.savefig(os.path.join(FIGURES_DIR, "pattern_detection_metrics.png"))
        plt.show()
        
        # Create a summary table showing overall metrics by method
        method_summary = accuracy_results.groupby('method').agg({
            'precision': ['mean', 'std'],
            'recall': ['mean', 'std'],
            'f1': ['mean', 'std'],
            'dataset': 'nunique'
        }).reset_index()
        
        # Flatten the multi-level column names
        method_summary.columns = ['_'.join(col).strip('_') for col in method_summary.columns]
        
        # Rename columns for clarity
        method_summary = method_summary.rename(columns={
            'method_': 'method',
            'precision_mean': 'avg_precision',
            'precision_std': 'std_precision',
            'recall_mean': 'avg_recall',
            'recall_std': 'std_recall',
            'f1_mean': 'avg_f1',
            'f1_std': 'std_f1',
            'dataset_nunique': 'num_datasets'
        })
else:
    print("No pattern match accuracy results available.")

Starting pattern detection accuracy comparison...
No comparison results generated.
No pattern match accuracy results available.


## Visual Similarity Comparison between Query Results

Compare the visual similarity of query results between different methods and the ground truth (M4-NoC) using two complementary metrics:

1. **Structural Similarity Index Measure (SSIM)**: Measures perceptual similarity between images, considering luminance, contrast, and structure. Values range from 0 to 1, where 1 means identical images.

2. **Pixel Difference Percentage**: Measures the percentage of pixels that differ between two images. Values range from 0% to 100%, where 0% means identical images and 100% means completely different images.

**Note:** The analysis is separated into:
1. **Calculation Cell**: Computes both SSIM scores and pixel difference percentages, caching results to `./analysis_cache/similarity_data.pkl`
2. **Plotting Cell**: Generates visualizations using cached data for both metrics
3. **Cache Management Cell**: Utilities to clear cache and force recalculation

This separation allows you to:
- Run expensive calculations once and reuse results
- Quickly regenerate plots without recalculating similarity metrics
- Force recalculation when needed by setting `force_recalculate = True` or clearing cache

The two metrics provide complementary information:
- **SSIM** is better for understanding perceptual similarity and structural differences
- **Pixel Difference Percentage** provides a straightforward measure of how many pixels actually differ between images

In [13]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import re
import tempfile
import shutil
from PIL import Image

# Import cairo_plot module for plotting time series
sys.path.append("..")
from cairo_plot import plot, compute_ssim, compute_pixel_difference_percentage

# Create cache directory for saving computed data
CACHE_DIR = "./analysis_cache"
os.makedirs(CACHE_DIR, exist_ok=True)

In [None]:
# Create cache directory for saving computed data
CACHE_DIR = "./analysis_cache"
os.makedirs(CACHE_DIR, exist_ok=True)

# Function to convert timestamp to epoch milliseconds in UTC
def convert_to_epoch_millis_utc(timestamp_str):
    """
    Convert timestamp string to epoch milliseconds in UTC.
    
    Parameters:
    -----------
    timestamp_str : str or int or float
        Timestamp in various formats (ISO string, epoch seconds, epoch milliseconds)
        
    Returns:
    --------
    int: Epoch milliseconds in UTC
    """
    from datetime import datetime, timezone
    import dateutil.parser
    
    if pd.isna(timestamp_str) or timestamp_str is None:
        return None
    
    # If it's already a number, assume it's epoch time
    if isinstance(timestamp_str, (int, float)):
        # Check if it's in seconds (typical range) or milliseconds
        if timestamp_str < 1e12:  # Likely in seconds
            return int(timestamp_str * 1000)
        else:  # Likely already in milliseconds
            return int(timestamp_str)
    
    # Convert string to number if possible
    try:
        num_val = float(timestamp_str)
        if num_val < 1e12:  # Likely in seconds
            return int(num_val * 1000)
        else:  # Likely already in milliseconds
            return int(num_val)
    except (ValueError, TypeError):
        pass
    
    # Try to parse as ISO datetime string
    try:
        dt = dateutil.parser.parse(timestamp_str)
        # Convert to UTC if timezone aware, otherwise assume UTC
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        else:
            dt = dt.astimezone(timezone.utc)
        return int(dt.timestamp() * 1000)
    except:
        print(f"Warning: Could not parse timestamp: {timestamp_str}")
        return None

# Function to find all query CSV files for a specific dataset and method
def find_query_csvs(base_dir, method, database, dataset):
    """
    Find all query result CSV files for a specific dataset and method.
    
    Parameters:
    -----------
    base_dir : str
        Base directory path
    method : str
        Method name (e.g., 'm4', 'm4Inf')
    database : str
        Database name (e.g., 'influx')
    dataset : str
        Dataset name
        
    Returns:
    --------
    list: List of dictionaries containing query information
    """
    query_dir = os.path.join(base_dir, method, database, dataset)
    query_files = []
    
    # Check if the directory exists
    if not os.path.exists(query_dir):
        print(f"Directory not found: {query_dir}")
        return query_files
    
    # Find all run directories
    run_dirs = glob.glob(os.path.join(query_dir, "run_*"))
    
    for run_dir in run_dirs:
        run_name = os.path.basename(run_dir)
        result_csv = os.path.join(run_dir, "results.csv")
        
        if not os.path.exists(result_csv):
            print(f"Results CSV not found: {result_csv}")
            continue
            
        df_result = pd.read_csv(result_csv)
        print(f"Loaded results CSV with {len(df_result)} rows")
        
        # Find all query directories within this run
        query_dirs = glob.glob(os.path.join(run_dir, "query_*"))
        
        for query_dir in query_dirs:
            query_name = os.path.basename(query_dir)
            query_id = int(query_name.split("_")[1])
            
            # Get query time range from results CSV
            query_row = df_result[df_result['query #'] == query_id]
            if query_row.empty:
                print(f"Query {query_id} not found in results CSV")
                continue
                
            query_start_raw = query_row['from'].values[0]
            query_end_raw = query_row['to'].values[0]
            
            # Convert to epoch milliseconds in UTC
            query_start_millis = convert_to_epoch_millis_utc(query_start_raw)
            query_end_millis = convert_to_epoch_millis_utc(query_end_raw)
            
            if query_start_millis is None or query_end_millis is None:
                print(f"Could not parse timestamps for query {query_id}: {query_start_raw} -> {query_end_raw}")
                continue
            
            print(f"Query {query_id}: {query_start_raw} -> {query_start_millis}, {query_end_raw} -> {query_end_millis}")
            
            # Find all CSV files in this query directory
            csv_files = glob.glob(os.path.join(query_dir, "*.csv"))
            
            for csv_file in csv_files:
                measure_id = os.path.basename(csv_file).split(".")[0]
                
                query_files.append({
                    'path': csv_file,
                    'run': run_name,
                    'query': query_name,
                    'query_id': query_id,
                    'measure': measure_id,
                    'dataset': dataset,
                    'method': method,
                    'start_time': query_start_millis,
                    'end_time': query_end_millis,
                    'duration': query_end_millis - query_start_millis,
                    'from_raw': query_start_raw,
                    'to_raw': query_end_raw
                })
    
    return query_files

# Function to generate plot images and compute similarity metrics
def compute_query_similarity_metrics(datasets, methods, temp_dir):
    """
    Generate plots and compute SSIM scores and pixel difference percentages between methods and ground truth.
    
    Parameters:
    -----------
    datasets : list
        List of dataset names
    methods : list
        List of method configurations
    temp_dir : str
        Temporary directory for storing images
        
    Returns:
    --------
    pd.DataFrame: DataFrame containing SSIM scores and pixel difference percentages
    """
    similarity_results = []
    
    # Get the first method as ground truth
    gt_method = methods[0]
    
    for dataset in datasets:
        print(f"Processing dataset: {dataset}")
        
        # Find all query CSVs for ground truth method
        gt_csvs = find_query_csvs(
            gt_method['path'], 
            gt_method['method'], 
            gt_method['database'], 
            dataset
        )

        if not gt_csvs:
            print(f"No ground truth CSVs found for dataset {dataset}")
            continue
        
        # Group ground truth files by query and measure
        gt_by_query = {}
        for gt_csv in gt_csvs:
            key = (gt_csv['query_id'], gt_csv['measure'])
            gt_by_query[key] = gt_csv
        
        # For each comparison method
        for method_idx, method in enumerate(methods[1:], 1):  # Skip the ground truth method
            method_name = method['name']
            print(f"  Comparing {method_name} with ground truth")
            
            # Find all query CSVs for this method
            method_csvs = find_query_csvs(
                method['path'], 
                method['method'], 
                method['database'], 
                dataset
            )
            
            if not method_csvs:
                print(f"  No CSVs found for method {method_name}")
                continue
            
            # Group method files by query and measure
            method_by_query = {}
            for method_csv in method_csvs:
                key = (method_csv['query_id'], method_csv['measure'])
                method_by_query[key] = method_csv
            
            # Find common queries between ground truth and method
            common_keys = set(gt_by_query.keys()) & set(method_by_query.keys())
            print(f"  Found {len(common_keys)} common queries to compare")
            
            # For each common query, generate plots and compute similarity metrics
            for query_key in common_keys:
                query_id, measure = query_key
                
                gt_csv = gt_by_query[query_key]
                method_csv = method_by_query[query_key]
                
                try:
                    # Load CSV data
                    gt_df = pd.read_csv(gt_csv['path'])
                    method_df = pd.read_csv(method_csv['path'])
                    
                    # Use the converted epoch milliseconds for plot range
                    query_start = gt_csv['start_time']
                    query_end = gt_csv['end_time']
                                        
                    # Create temporary image files
                    gt_img_path = os.path.join(temp_dir, f"{dataset}_{query_id}_{measure}_gt.png")
                    method_img_path = os.path.join(temp_dir, f"{dataset}_{query_id}_{measure}_method{method_idx}.png")
                
                    # Generate plots using cairo_plot with epoch milliseconds
                    plot(gt_df, measure, gt_img_path.replace('.png', ''), 1000, 600, query_start, query_end)
                    plot(method_df, measure, method_img_path.replace('.png', ''), 1000, 600, query_start, query_end)
                    
                    # Compute SSIM between the two images
                    ssim_score = compute_ssim(gt_img_path, method_img_path)
                    
                    # Compute pixel difference percentage between the two images
                    pixel_diff_percentage = compute_pixel_difference_percentage(gt_img_path, method_img_path)
                    
                    # Save results with time information
                    similarity_results.append({
                        'dataset': dataset,
                        'query_id': query_id,
                        'measure': measure,
                        'method': method_name,
                        'ssim': ssim_score,
                        'pixel_diff_percentage': pixel_diff_percentage,
                        'start_time': query_start,
                        'end_time': query_end,
                        'duration': gt_csv['duration'],
                        'from_raw': gt_csv['from_raw'],
                        'to_raw': gt_csv['to_raw']
                    })
                    
                except Exception as e:
                    print(f"  Error processing {dataset} query {query_id} measure {measure}: {str(e)}")
    
    # Convert results to DataFrame
    if similarity_results:
        similarity_df = pd.DataFrame(similarity_results)
        return similarity_df
    else:
        print("No similarity results generated.")
        return None

# Check if similarity data is already cached
similarity_cache_file = os.path.join(CACHE_DIR, "similarity_data.pkl")
force_recalculate = False  # Set this to True to force recalculation

if os.path.exists(similarity_cache_file) and not force_recalculate:
    print("Loading cached similarity data...")
    with open(similarity_cache_file, 'rb') as f:
        cached_data = pickle.load(f)
        similarity_df = cached_data.get('similarity_df')
        similarity_summary = cached_data.get('similarity_summary')
    print(f"Loaded cached similarity data with {len(similarity_df)} records")
else:
    print("Computing similarity metrics...")
    
    # Create temporary directory for images
    temp_dir = tempfile.mkdtemp()
    
    try:
        print(f"Created temporary directory: {temp_dir}")
        
        # Compute similarity metrics
        print("Computing SSIM scores and pixel difference percentages between query results...")
        similarity_df = compute_query_similarity_metrics(all_datasets, METHODS, temp_dir)
        
        if similarity_df is not None and not similarity_df.empty:
            # Display summary of similarity metrics
            print("\nSummary of similarity metrics by dataset and method:")
            similarity_summary = similarity_df.groupby(['dataset', 'method'])[['ssim', 'pixel_diff_percentage']].agg(['mean', 'median', 'std', 'min', 'max', 'count']).reset_index()
            display(similarity_summary)
            
            # Display time range information if available
            if 'start_time' in similarity_df.columns:
                print("\nQuery time range summary (epoch milliseconds UTC):")
                time_summary = similarity_df.groupby(['dataset', 'method']).agg({
                    'start_time': ['min', 'max'],
                    'end_time': ['min', 'max'],
                    'duration': ['mean', 'median', 'std']
                }).reset_index()
                display(time_summary)
                
                # Also show some example raw timestamps for verification
                print("\nExample timestamp conversions:")
                sample_data = similarity_df[['dataset', 'method', 'query_id', 'from_raw', 'to_raw', 'start_time', 'end_time', 'duration']].head(3)
                display(sample_data)
            
            # Cache the computed data
            cache_data = {
                'similarity_df': similarity_df,
                'similarity_summary': similarity_summary
            }
            with open(similarity_cache_file, 'wb') as f:
                pickle.dump(cache_data, f)
            print(f"Cached similarity data to {similarity_cache_file}")
        else:
            print("No similarity results available.")
            similarity_df = None
            similarity_summary = None

    finally:
        # Clean up the temporary directory
        if 'temp_dir' in locals():
            shutil.rmtree(temp_dir)
            print(f"Removed temporary directory: {temp_dir}")

# For backward compatibility, create ssim_df and ssim_summary variables
if 'similarity_df' in locals() and similarity_df is not None:
    ssim_df = similarity_df.copy()
    ssim_summary = similarity_df.groupby(['dataset', 'method'])['ssim'].agg(['mean', 'median', 'std', 'min', 'max', 'count']).reset_index()
else:
    ssim_df = None
    ssim_summary = None

In [None]:
# Similarity Metrics Plotting - Run this cell to generate SSIM and pixel difference visualizations
# This cell uses cached data from the previous calculation cell

if 'similarity_df' in globals() and similarity_df is not None and not similarity_df.empty:
    print("Generating similarity metrics visualizations...")
    
    # Get unique datasets and methods
    datasets = similarity_df['dataset'].unique()
    methods = similarity_df['method'].unique()
    
    # Create a mapping of methods to colors using the global method color mapping
    method_color_map = {}
    for method in methods:
        method_color_map[method] = GLOBAL_METHOD_COLORS.get(method, '#1f77b4')  # Fallback to blue if not found
    
    print(f"Method color mapping: {method_color_map}")
    
    # Create box plots of SSIM scores by dataset
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 16))
    
    # SSIM Box Plot
    n_datasets = len(datasets)
    n_methods = len(methods)
    width = 0.6 / n_methods
    
    for ds_idx, dataset in enumerate(datasets):
        for method_idx, method in enumerate(methods):
            # Get SSIM data for this dataset-method combination
            ds_method_data = similarity_df[
                (similarity_df['dataset'] == dataset) & 
                (similarity_df['method'] == method)
            ]['ssim']
            
            if len(ds_method_data) == 0:
                continue
                
            # Calculate statistics
            mean_val = ds_method_data.mean()
            median_val = ds_method_data.median()
            min_val = ds_method_data.min()
            max_val = ds_method_data.max()
            std_val = ds_method_data.std()
            
            # Position for this box
            x_pos = ds_idx + (method_idx - n_methods/2 + 0.5) * width
            
            # Draw box
            box_bottom = max(0, median_val - std_val/2)
            box_height = std_val
            box = plt.Rectangle(
                (x_pos - width/2, box_bottom),
                width, box_height,
                alpha=0.7,
                facecolor=method_color_map[method],
                edgecolor='black',
                linewidth=1.5
            )
            ax1.add_patch(box)
            
            # Draw median line
            ax1.plot([x_pos - width/2, x_pos + width/2], [median_val, median_val], 
                    color='white', linewidth=2.5, solid_capstyle='round')
            ax1.plot([x_pos - width/2, x_pos + width/2], [median_val, median_val], 
                    color='black', linewidth=1.5, solid_capstyle='round')
            
            # Draw whiskers
            ax1.plot([x_pos, x_pos], [max(0, min_val), box_bottom], 
                    color='black', linewidth=1.5, linestyle='-')
            ax1.plot([x_pos, x_pos], [box_bottom + box_height, max_val], 
                    color='black', linewidth=1.5, linestyle='-')
            
            # Draw caps on whiskers
            whisker_width = width / 4
            ax1.plot([x_pos - whisker_width, x_pos + whisker_width], [max(0, min_val), max(0, min_val)], 
                    color='black', linewidth=1.5)
            ax1.plot([x_pos - whisker_width, x_pos + whisker_width], [max_val, max_val], 
                    color='black', linewidth=1.5)
            
            # Draw mean point
            ax1.plot(x_pos, mean_val, 'o', color='white', markersize=8)
            ax1.plot(x_pos, mean_val, 'o', color='black', markersize=6)
           
            # Plot individual points with jitter
            jitter = np.random.uniform(-width/3, width/3, size=len(ds_method_data))
            ax1.scatter(
                [x_pos + j for j in jitter], 
                ds_method_data.values,
                s=30,
                alpha=0.5,
                color=method_color_map[method],
                edgecolor='black',
                linewidth=0.5,
                zorder=3
            )
    
    # Set SSIM axis properties
    ax1.set_xticks(range(len(datasets)))
    ax1.set_xticklabels(datasets, rotation=45, ha='right')
    ax1.set_ylim(max(0, similarity_df['ssim'].min() - 0.05), min(1.0, similarity_df['ssim'].max() + 0.05))
    
    # Create legend for SSIM plot
    legend_elements = [plt.Rectangle((0,0),1,1, facecolor=method_color_map[method], 
                                   edgecolor='black', alpha=0.7, label=method) 
                      for method in methods if method in method_color_map]
    ax1.legend(handles=legend_elements, loc='upper right')
    
    # Apply publication styling to SSIM plot
    set_publication_style(ax1, 
                         title='SSIM Scores vs Ground Truth (M4-NoC)',
                         xlabel='Dataset', 
                         ylabel='SSIM Score',
                        )
    
    # Pixel Difference Percentage Box Plot
    for ds_idx, dataset in enumerate(datasets):
        for method_idx, method in enumerate(methods):
            # Get pixel difference data for this dataset-method combination
            ds_method_data = similarity_df[
                (similarity_df['dataset'] == dataset) & 
                (similarity_df['method'] == method)
            ]['pixel_diff_percentage']
            
            if len(ds_method_data) == 0:
                continue
                
            # Calculate statistics
            mean_val = ds_method_data.mean()
            median_val = ds_method_data.median()
            min_val = ds_method_data.min()
            max_val = ds_method_data.max()
            std_val = ds_method_data.std()
            
            # Position for this box
            x_pos = ds_idx + (method_idx - n_methods/2 + 0.5) * width
            
            # Draw box
            box_bottom = max(0, median_val - std_val/2)
            box_height = std_val
            box = plt.Rectangle(
                (x_pos - width/2, box_bottom),
                width, box_height,
                alpha=0.7,
                facecolor=method_color_map[method],
                edgecolor='black',
                linewidth=1.5
            )
            ax2.add_patch(box)
            
            # Draw median line
            ax2.plot([x_pos - width/2, x_pos + width/2], [median_val, median_val], 
                    color='white', linewidth=2.5, solid_capstyle='round')
            ax2.plot([x_pos - width/2, x_pos + width/2], [median_val, median_val], 
                    color='black', linewidth=1.5, solid_capstyle='round')
            
            # Draw whiskers
            ax2.plot([x_pos, x_pos], [max(0, min_val), box_bottom], 
                    color='black', linewidth=1.2, linestyle='-')
            ax2.plot([x_pos, x_pos], [box_bottom + box_height, max_val], 
                    color='black', linewidth=1.2, linestyle='-')
            
            # Draw caps on whiskers
            whisker_width = width / 4
            ax2.plot([x_pos - whisker_width, x_pos + whisker_width], [max(0, min_val), max(0, min_val)], 
                    color='black', linewidth=1.2)
            ax2.plot([x_pos - whisker_width, x_pos + whisker_width], [max_val, max_val], 
                    color='black', linewidth=1.2)
            
            # Draw mean point
            ax2.plot(x_pos, mean_val, 'o', color='white', markersize=6)
            ax2.plot(x_pos, mean_val, 'o', color='black', markersize=4)
           
            # Plot individual points with jitter
            jitter = np.random.uniform(-width/3, width/3, size=len(ds_method_data))
            ax2.scatter(
                [x_pos + j for j in jitter], 
                ds_method_data.values,
                s=20,
                alpha=0.5,
                color=method_color_map[method],
                edgecolor='black',
                linewidth=0.3,
                zorder=3
            )
    
    # Set pixel difference axis properties and apply publication style
    ax2.set_xticks(range(len(datasets)))
    ax2.set_xticklabels(datasets, rotation=45, ha='right')
    
    # Create legend pixel differenceboth lots
    legend_elements = [plt.Rectangle((0,0),1,1, facecolor=method_color_map[method], 
                                   edgecolor='black', alpha=0.7, label=method) 
                      for method in methods if method in method_color_map]
    ax2.legend(handles=legend_elements, loc='upper right')
    
    # Apply publication styling to pixel difference plot
    set_publication_style(ax2, 
                         title='Pixel Difference % vs Ground Truth (M4-NoC)',
                         xlabel='Dataset', 
                         ylabel='Pixel Difference (%)',
                         )

    plt.tight_layout()
    plt.savefig(os.path.join(FIGURES_DIR, "similarity_metrics_boxplots_by_dataset.pdf"))
    plt.savefig(os.path.join(FIGURES_DIR, "similarity_metrics_boxplots_by_dataset.png"))
    plt.show()
    
    # Create heatmaps for both metrics
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
    
    # SSIM Heatmap - higher values are better (use YlGnBu: yellow-green-blue)
    heatmap_data_ssim = similarity_df.groupby(['dataset', 'method'])['ssim'].mean().reset_index()
    pivot_data_ssim = heatmap_data_ssim.pivot(index='dataset', columns='method', values='ssim')
    
    sns.heatmap(pivot_data_ssim, annot=True, fmt='.3f', cmap='YlGnBu', vmin=0, vmax=1, 
                linewidths=0.5, ax=ax1, cbar_kws={'shrink': 0.8})
    
    # Apply publication styling to SSIM heatmap
    set_publication_style(ax1, 
                         title='Average SSIM Scores by Dataset and Method\n(Higher is Better)',
                         xlabel='Method', 
                         ylabel='Dataset')
    
    # Pixel Difference Percentage Heatmap - lower values are better (use YlGnBu_r: reverse YlGnBu)
    heatmap_data_pixel = similarity_df.groupby(['dataset', 'method'])['pixel_diff_percentage'].mean().reset_index()
    pivot_data_pixel = heatmap_data_pixel.pivot(index='dataset', columns='method', values='pixel_diff_percentage')
    
    # Use reversed YlGnBu colormap so that lower values (better) are darker blue/green
    sns.heatmap(pivot_data_pixel, annot=True, fmt='.1f', cmap='YlGnBu_r', 
                linewidths=0.5, ax=ax2, cbar_kws={'shrink': 0.8})
    
    # Apply publication styling to pixel difference heatmap
    set_publication_style(ax2, 
                         title='Average Pixel Difference % by Dataset and Method\n(Lower is Better)',
                         xlabel='Method', 
                         ylabel='Dataset')
    
    plt.tight_layout()
    plt.savefig(os.path.join(FIGURES_DIR, "similarity_metrics_heatmaps.pdf"))
    plt.savefig(os.path.join(FIGURES_DIR, "similarity_metrics_heatmaps.png"))
    plt.show()
    
    # Display summary statistics
    print("\n=== SIMILARITY METRICS SUMMARY ===")
    print("\nSSIM Scores Summary:")
    print(similarity_df.groupby(['dataset', 'method'])['ssim'].agg(['mean', 'median', 'std', 'min', 'max', 'count']).round(4))
    
    print("\nPixel Difference Percentage Summary:")
    print(similarity_df.groupby(['dataset', 'method'])['pixel_diff_percentage'].agg(['mean', 'median', 'std', 'min', 'max', 'count']).round(2))
    
    print("\nSimilarity metrics visualizations completed!")
    
elif 'ssim_df' in globals() and ssim_df is not None and not ssim_df.empty:
    # Fallback to original SSIM-only visualization for backward compatibility
    print("Generating SSIM visualizations (legacy mode)...")
    
    # Create box plots of SSIM scores by dataset  
    fig, ax = plt.subplots(figsize=(8, 6))  # Smaller figure for double-column
    
    # Use seaborn for cleaner boxplots with consistent colors
    method_names = ssim_df['method'].unique()
    palette = [GLOBAL_METHOD_COLORS.get(method, '#1f77b4') for method in method_names]
    sns.boxplot(data=ssim_df, x='dataset', y='ssim', hue='method', ax=ax, palette=palette)
    
    # Apply publication styling
    set_publication_style(ax, 
                         title='SSIM Scores vs Ground Truth (M4-NoC)', 
                         xlabel='Dataset', 
                         ylabel='SSIM Score',
                         legend_title='Method')
    
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(os.path.join(FIGURES_DIR, "ssim_comparison_by_dataset.pdf"))
    plt.savefig(os.path.join(FIGURES_DIR, "ssim_comparison_by_dataset.png"))
    plt.show()
    
    # Create heatmap
    fig, ax = plt.subplots(figsize=(10, 8))  # Smaller figure for double-column
    
    # Create pivot table for heatmap
    ssim_pivot = ssim_df.groupby(['dataset', 'method'])['ssim'].mean().reset_index()
    ssim_pivot = ssim_pivot.pivot(index='dataset', columns='method', values='ssim')
    
    # Create heatmap
    sns.heatmap(ssim_pivot, annot=True, fmt='.3f', cmap='YlGnBu', vmin=0, vmax=1, 
                linewidths=0.5, ax=ax, cbar_kws={'shrink': 0.8})
    
    # Apply publication styling
    set_publication_style(ax, 
                         title='Average SSIM Scores by Dataset and Method',
                         xlabel='Method', 
                         ylabel='Dataset')
    
    plt.tight_layout()
    plt.savefig(os.path.join(FIGURES_DIR, "ssim_heatmap_by_dataset.pdf"))
    plt.savefig(os.path.join(FIGURES_DIR, "ssim_heatmap_by_dataset.png"))
    plt.show()
    
    print("SSIM visualizations completed!")
    
else:
    print("No similarity data available for plotting.")

In [None]:
# Cache Management Utilities
# Use this cell to manage cached analysis data

def clear_analysis_cache():
    """Clear all cached analysis data"""
    if os.path.exists(CACHE_DIR):
        shutil.rmtree(CACHE_DIR)
        print(f"Cleared analysis cache directory: {CACHE_DIR}")
    else:
        print("Cache directory does not exist")

def list_cache_contents():
    """List contents of the cache directory"""
    if os.path.exists(CACHE_DIR):
        cache_files = os.listdir(CACHE_DIR)
        if cache_files:
            print(f"Cache directory contents ({CACHE_DIR}):")
            for file in cache_files:
                file_path = os.path.join(CACHE_DIR, file)
                size = os.path.getsize(file_path)
                print(f"  - {file} ({size:,} bytes)")
        else:
            print("Cache directory is empty")
    else:
        print("Cache directory does not exist")

def get_cache_info():
    """Get information about cached data"""
    similarity_cache_file = os.path.join(CACHE_DIR, "similarity_data.pkl")
    ssim_cache_file = os.path.join(CACHE_DIR, "ssim_data.pkl")  # Legacy file
    
    # Check new similarity data cache
    if os.path.exists(similarity_cache_file):
        size = os.path.getsize(similarity_cache_file)
        mod_time = os.path.getmtime(similarity_cache_file)
        mod_time_str = pd.to_datetime(mod_time, unit='s').strftime('%Y-%m-%d %H:%M:%S')
        
        print(f"Similarity metrics cache file exists:")
        print(f"  - Size: {size:,} bytes")
        print(f"  - Modified: {mod_time_str}")
        
        # Try to load and show basic info
        try:
            with open(similarity_cache_file, 'rb') as f:
                cached_data = pickle.load(f)
                similarity_df = cached_data.get('similarity_df')
                if similarity_df is not None:
                    print(f"  - Records: {len(similarity_df):,}")
                    print(f"  - Datasets: {similarity_df['dataset'].nunique()}")
                    print(f"  - Methods: {similarity_df['method'].nunique()}")
                    print(f"  - Columns: {list(similarity_df.columns)}")
        except Exception as e:
            print(f"  - Error loading cache: {str(e)}")
    
    # Check legacy SSIM cache
    elif os.path.exists(ssim_cache_file):
        size = os.path.getsize(ssim_cache_file)
        mod_time = os.path.getmtime(ssim_cache_file)
        mod_time_str = pd.to_datetime(mod_time, unit='s').strftime('%Y-%m-%d %H:%M:%S')
        
        print(f"Legacy SSIM cache file exists:")
        print(f"  - Size: {size:,} bytes")
        print(f"  - Modified: {mod_time_str}")
        
        # Try to load and show basic info
        try:
            with open(ssim_cache_file, 'rb') as f:
                cached_data = pickle.load(f)
                ssim_df = cached_data.get('ssim_df')
                if ssim_df is not None:
                    print(f"  - Records: {len(ssim_df):,}")
                    print(f"  - Datasets: {ssim_df['dataset'].nunique()}")
                    print(f"  - Methods: {ssim_df['method'].nunique()}")
                    print(f"  - Columns: {list(ssim_df.columns)}")
        except Exception as e:
            print(f"  - Error loading cache: {str(e)}")
    else:
        print("No similarity metrics cache files found")

# Run cache info by default
print("=== CACHE INFORMATION ===")
list_cache_contents()
print()
get_cache_info()

# Uncomment the line below to clear the cache if needed
clear_analysis_cache()

In [32]:
query_id = 40
measure = 10
dataset = "soccer_exp"
width = 1000
height = 600
outFolder = "_output_no_allocation"
gt_df = pd.read_csv(f"../{outFolder}/timeQueries/m4/influx/{dataset}/run_0/query_{query_id}/{measure}.csv")
method_df = pd.read_csv(f"../{outFolder}/timeCacheQueries/m4Inf/influx/{dataset}/run_0/query_{query_id}/{measure}.csv")

# Create temporary image files
gt_img_path = os.path.join(f"{dataset}_{query_id}_{measure}_gt.png")
method_img_path = os.path.join(f"{dataset}_{query_id}_{measure}_method.png")

# Get min and max timestamps to use the same scale for both plots
min_ts = method_df['timestamp'].min()
max_ts = method_df['timestamp'].max()
# Generate plots using cairo_plot
plot(gt_df, str(measure), gt_img_path.replace('.png', ''), width, height, min_ts, max_ts)
plot(method_df, str(measure), method_img_path.replace('.png', ''), width, height, min_ts, max_ts)

# Compute SSIM between the two images
ssim_score = compute_ssim(gt_img_path, method_img_path)

px_diff = compute_pixel_difference_percentage(gt_img_path, method_img_path)

print(ssim_score)
print(f"{px_diff}%")


0.9559743692319546
1.3131666666666666%
