# Visualize Query Results

In [None]:
import matplotlib.pyplot as plt
import json
import re
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib.patches import Patch

In [None]:
results_dir = "./results"
experiment_date = "10_2_2024"
dataset_name = "hnm"

Define the color palette

In [None]:
sns.set_theme(style="whitegrid")
palette = sns.color_palette("deep")
palette

## Load Visualization Data

In [None]:
visualizations = {
    "cw_test": None, 
    "brutefore_test": None,
    "ratios": None,
    "data_buckets": None,
    "filter_examples": None,
}



for vis_name in visualizations.keys():
    file_path = f"{results_dir}/{experiment_date}/benchmark_{dataset_name}_{vis_name}.json"
    with open(file_path, 'r') as file:
        visualizations[vis_name] = json.load(file)

### Visualize Filter Restrictiveness

In [None]:
blue_color = palette[0]

In [None]:
def visualize_ratios(ratios):
    # Define the intervals (bins)
    bins = np.linspace(0.0, 0.5, num=11)  # 11 edges for 10 bins
    
    # Count the number of ratios in each bin
    hist, _ = np.histogram(ratios, bins)
    
    # Plotting the bar chart
    plt.figure(figsize=(10, 6))
    plt.bar(bins[:-1], hist, width=0.05, align='edge', color=blue_color)
    
    # Setting the x-axis limits
    plt.xlim(0, 0.5)
    
    # Adding labels and title
    plt.xlabel('% Of Dataset Left')
    plt.ylabel('% Of Queries')
    plt.title(f'% Of Dataset Left After Applying Query Conditions For Dataset {dataset_name.capitalize()} With 10000 Queries')
    
    # Show the plot
    plt.show()

In [None]:
visualize_ratios(visualizations["ratios"])

### Visualize Bucket Distribution

In [None]:
def plot_bucket_items(data, highlight_ids=None):
    """
    Plot the number of items in each bucket, with an optional overlay of highlighted items.
    
    Parameters:
    data (DataFrame): The data frame containing the 'id', 'bucket_str', and 'cluster' columns.
    highlight_ids (list, optional): List of ids to highlight in the visualization.
    """
    # Set up the figure
    plt.figure(figsize=(10, 6))
    
    # Count the total number of items in each bucket
    bucket_counts = data.groupby('bucket_str').size()
    buckets = bucket_counts.index.tolist()
    counts = bucket_counts.tolist()

    # Create the bar plot for total items
    total_bars = plt.bar(buckets, counts, label='Total Items in Bucket', color=blue_color)

    # If highlight_ids is provided, overlay highlighted bars
    if highlight_ids is not None:
        # Filter data to include only highlighted ids
        highlighted_data = data[data['id'].isin(highlight_ids)]
        highlighted_counts = highlighted_data.groupby('bucket_str').size()
        highlighted_buckets = highlighted_counts.index.tolist()
        highlighted_counts = highlighted_counts.tolist()

        # Create overlay bar plot for highlighted items
        highlighted_bars = plt.bar(highlighted_buckets, highlighted_counts, color=red_color, label='Items Satisfying Condition in Bucket')

        # Create custom legend
        plt.legend()

    # Set the title based on whether highlight_ids is provided
    title = 'Number of Items in Each Bucket For Query With Lowest Restrictiveness' if highlight_ids else 'Number of Items in Each Bucket'
    plt.title(title)
    plt.xlabel('Bucket')
    plt.ylabel('Count')

    # Annotate each bar with the count of elements
    for bar in total_bars:
        bar_height = int(bar.get_height())
        if bar_height > 0:  # Only annotate bars with a height greater than zero
            plt.annotate(f'{bar_height}', (bar.get_x() + bar.get_width() / 2., bar_height),
                         ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
                         textcoords='offset points')

    # Annotate highlighted bars if present
    if highlight_ids is not None:
        for bar in highlighted_bars:
            bar_height = int(bar.get_height())
            if bar_height > 0:  # Only annotate bars with a height greater than zero
                plt.annotate(f'{bar_height}', (bar.get_x() + bar.get_width() / 2., bar_height),
                             ha='center', va='center', fontsize=10, color='black', xytext=(0, 5),
                             textcoords='offset points')

    plt.show()

In [None]:
data_buckets = pd.DataFrame(visualizations["data_buckets"], columns=['id', 'bucket', 'bucket_str'])
plot_bucket_items(data_buckets)

Visualize Query Filter

In [None]:
plot_bucket_items(data_buckets, list(visualizations["filter_examples"].values())[0])

### Visualize Constraint Weight Parameter

In [None]:
cw_test = visualizations["cw_test"]
print(cw_test)

In [None]:
keys = [re.sub("[^0-9.-]", "", k) for k in cw_test.keys()]
values = list(cw_test.values())

# Create the bar plot using matplotlib
plt.figure(figsize=(10, 6))  # Optional: Adjust the figure size as needed
plt.bar(keys, values, color=blue_color)

# Customize the plot
plt.xlabel('Constraint Weight Parameter Value')
plt.ylabel('Median Precision')
plt.title(f'Dataset {dataset_name.capitalize()} With 1000 queries')

# Display the plot
plt.show()

### Visualize Brute Force Parameter

In [None]:
bruteforce_test = visualizations["brutefore_test"]
print(bruteforce_test)

In [None]:
green_color = palette[2]
red_color = palette[3]


plt.figure(figsize=(8, 6))
plt.plot(bruteforce_test['0'][0], bruteforce_test['0'][1], color=green_color, marker='o', linestyle='-', label='LMI')
plt.plot(bruteforce_test['1'][0], bruteforce_test['1'][1], color=red_color, marker='o', linestyle='-', label='Bruteforce')

plt.xlabel('% of Dataset Left')
plt.ylabel('Median Wall Time In Seconds')
plt.title(f'Dataset {dataset_name.capitalize()} With 1000 queries')
plt.legend()
plt.show()