In [131]:
import matplotlib.pyplot as plt
import pandas as pd

In [132]:
def plot_input_data(benchmark_name: str):
    N = 25
    LIMIT = 100
    
    data_database = (pd
        .read_parquet(f"../../data/{benchmark_name}/10000/1/database.parquet")
        .query(f"`from` < {LIMIT}")
    )
    data_query    = (pd
        .read_parquet(f"../../data/{benchmark_name}/10000/1/query.parquet")
        .query(f"`from` < {LIMIT}")
    )

    # Create the Gantt chart
    fig, ax = plt.subplots(figsize=(10, 8))

    db_limit = min(N, len(data_database))    
    q_limit  = min(N, len(data_query))    
                   
    for i in range(db_limit):
        start = max(0, data_database['from'].iloc[i])
        end   = min(data_database['to'].iloc[i], 100)
        ax.barh(i, end - start + 1.0, left=start - 0.5, height=0.4, align='center', color='green')


    # Plot each interval (first N rows)
    for i in range(q_limit):
        start = max(0, data_query['from'].iloc[i])
        end   = min(data_query['to'].iloc[i], 100)
        ax.barh(db_limit + i, end - start + 1.0, left=start - 0.5, height=0.4, align='center', color='blue')
    
    # Customize the plot
    ax.set_xlabel('Value')
    ax.set_title(f"Visualization of {benchmark_name}")
    ax.grid(True, alpha=0.3)
    
    # Tight layout to prevent label cutoff
    plt.tight_layout()
    
    # # Show the plot
    # plt.show()

    # Save the plot
    plt.savefig(f"../../docs/figures/test-data-{benchmark_name}.png", bbox_inches='tight')

    # Close to free up memory
    plt.close(fig)
    

In [133]:
benchmark_names = [
    "all-to-all", 
    "all-to-one", 
    "continuous-16",
    "one-to-all",
    "one-to-one",
    "spanning-4",
    "spanning-16",
    "sparse-16",
]

for benchmark_name in benchmark_names:
    plot_input_data(benchmark_name)