In [None]:
import matplotlib as mpl

import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
column_names = ["Tag", "TrainTime", "ReadTime", "IndexSize"]

# Read the data
df = pd.read_csv("data.dat", names=column_names)

# Split the 'tag' column into 'Method' and 'Dataset'
df[['Learned Index', 'Dataset', 'Workload']] = df['Tag'].str.split('_', expand=True)

mpl.rcParams['font.family'] = 'Times New Roman'

num_datasets = df['Dataset'].nunique()
df['ReadTime'] = df['ReadTime'] / 10e7
df['TrainTime'] = df['TrainTime'] /10e6

g = sns.FacetGrid(df, col="Dataset", sharey=False, col_wrap=num_datasets)

# palette = sns.color_palette("pastel")
palette = sns.color_palette(['#e76254', '#ef8a47', '#f7aa58', '#ffd06f', '#ffe6b7', '#aadce0', '#72bcd5', '#528fad', '#376795', '#1e466e'])
# Apply a barplot on each facet
g.map(sns.barplot, 'Learned Index', 'ReadTime',palette=palette , order=df['Learned Index'].unique(), linewidth=1, edgecolor='black')
g.set_xticklabels(rotation=90)

for ax in g.axes.flat:
    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_linewidth(1)

g.set_titles("{col_name}",size=20)
g.set_axis_labels("", "Average Query Time/μs", size=17)

g.set_xticklabels(size=14)
g.set_yticklabels(size=14)

plt.savefig("PointQuery-7A.pdf", format='pdf', bbox_inches='tight')
plt.show()



In [None]:
from matplotlib.ticker import LogFormatterSciNotation
g = sns.FacetGrid(df, col="Dataset", sharey=False, col_wrap=num_datasets)

# palette = sns.color_palette("pastel")

# Apply a barplot on each facet
g.map(sns.barplot, 'Learned Index', 'IndexSize', order=df['Learned Index'].unique(),palette=palette, linewidth=1, edgecolor='black')
g.set_xticklabels(rotation=90)

for ax in g.axes.flat:
    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_linewidth(1)
    ax.set_yscale("log")
    ax.yaxis.set_major_formatter(LogFormatterSciNotation())
#     ax.set_yticklabels(ax.get_yticks(), size=14)
    ax.tick_params(axis='y', labelsize=14)

g.set_titles("{col_name}",size=20)
g.set_axis_labels("", "Index Size/Byte", size=17)

# g.set(yscale="log")
g.set_xticklabels(size=14)
# for ax in g.axes.flat:
#     ax.set_yticklabels(ax.get_yticks(), size=14)
# g.set_yticklabels(size=14)

plt.savefig("ModelSize-7A.pdf", format='pdf', bbox_inches='tight')

plt.show()

In [None]:
# df['TrainTime'] = df['TrainTime'] /10e6

g = sns.FacetGrid(df, col="Dataset", sharey=False, col_wrap=num_datasets)

# palette = sns.color_palette("pastel")
palette = sns.color_palette(['#e76254', '#ef8a47', '#f7aa58', '#ffd06f', '#ffe6b7', '#aadce0', '#72bcd5', '#528fad', '#376795', '#1e466e'])

# Apply a barplot on each facet
g.map(sns.barplot, 'Learned Index', 'TrainTime', order=df['Learned Index'].unique(),palette=palette, linewidth=1, edgecolor='black')
g.set_xticklabels(rotation=90)

for ax in g.axes.flat:
    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_linewidth(1)

g.set_titles("{col_name}",size=20)
g.set_axis_labels("", "TrainTime/s",size=17)
g.set_xticklabels(size=14)
g.set_yticklabels(size=14)

plt.savefig("TrainTime-7A.pdf", format='pdf', bbox_inches='tight')
# g.set(yscale="log")

plt.show()

In [None]:
import matplotlib as mpl

import pandas as pd
import seaborn as sns
from matplotlib.ticker import LogLocator, LogFormatter

import matplotlib.pyplot as plt
column_names = ["Tag", "TrainTime", "ReadTime", "IndexSize", "LoadTime"]

# Read the data
df = pd.read_csv("data.dat", names=column_names)

# Split the 'tag' column into 'Method' and 'Dataset'
df[['Learned Index', 'Dataset', 'Workload', 'Range']] = df['Tag'].str.split('_', expand=True)

mpl.rcParams['font.family'] = 'Times New Roman'

# print(df.head())
num_datasets = df['Dataset'].nunique()
df['LoadTime'] = df['LoadTime']/1e6


# plt.ticklabel_format(style='plain', axis='y')
# print(df.head())
g = sns.FacetGrid(df, col="Dataset", sharey=False, col_wrap=num_datasets)

# palette = sns.color_palette("pastel")
palette = sns.color_palette(['#e76254', '#ef8a47', '#f7aa58', '#ffd06f', '#ffe6b7', '#aadce0', '#72bcd5', '#528fad', '#376795', '#1e466e'])
palette.pop(0)
# Apply a barplot on each facet
g.map(sns.barplot, 'Learned Index', 'LoadTime',palette=palette , order=df['Learned Index'].unique(), linewidth=1, edgecolor='black')
g.set_xticklabels(rotation=90)


        
for ax in g.axes.flat:

    for _, spine in ax.spines.items():
        spine.set_visible(True)
        spine.set_linewidth(1)
#     ax.get_yaxis().set_major_formatter(mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))


g.set_titles("{col_name}",size=20)
g.set_axis_labels("", "Model Load Time/s", size=17)

g.set_xticklabels(size=14)
g.set_yticklabels(size=14)

plt.savefig("LoadTime-7A.pdf", format='pdf', bbox_inches='tight')
plt.show()


In [None]:
import matplotlib as mpl

import pandas as pd
import seaborn as sns
import numpy as np

import matplotlib.pyplot as plt
column_names = ["Tag", "TrainTime", "ReadTime", "IndexSize"]

# Read the data
df = pd.read_csv("data.dat", names=column_names)

# Split the 'tag' column into 'Method' and 'Dataset'
df[['Learned Index', 'Dataset', 'Workload']] = df['Tag'].str.split('_', expand=True)

mpl.rcParams['font.family'] = 'Times New Roman'

num_datasets = df['Dataset'].nunique()
df['IndexSize'] = df['IndexSize'] / 1024/1024


FenseSize = 520
BFSize = 76
BlockCache = 8
WriteBuffer = 8



# palette = sns.color_palette("pastel")
palette = sns.color_palette(['#e76254', '#ef8a47', '#f7aa58', '#ffd06f', '#ffe6b7', '#aadce0', '#72bcd5', '#528fad', '#376795', '#1e466e'])
# Apply a barplot on each facet


import matplotlib.pyplot as plt

labels = ['WiscKey', 'Piecewise', 'ALEX','ALEX*',  'LIPP', 'FITing-Tree','PGM', 'RMI' , 'RS','PLEX','DILI']
first = [15, 15, 15, 15, 15,15, 15, 15, 15, 15, 15]
second = [76, 76, 76, 76, 76,76, 76, 76, 76, 76, 76]
third = [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]
fourth = [8, 8, 8, 8, 8 , 8, 8, 8, 8, 8, 8]
fifth = df['IndexSize'].tolist()
data = [first, second, third, fourth, fifth]

x = range(len(labels))
width = 0.75
# 将bottom_y元素都初始化为0
totals = [sum(y) for y in zip(*data)]

# Normalize the data to represent percentages
data_percent = [[y / total * 100 for y, total in zip(y, totals)] for y in data]

legend_labels = ['Fence Pointer', 'Bloom Filter', 'Block Cache', 'Write Buffer', 'Learned Index']

# Initialize bottom_y elements to 0
bottom_y = [0] * len(labels)

# Get the unique datasets
datasets = df['Dataset'].unique()

# Create a figure and a set of subplots
fig, axs = plt.subplots(1, len(datasets), figsize=(10*len(datasets), 11))

# Create a list to store the bar objects
bar_list = []

# Loop over the datasets
for i, dataset in enumerate(datasets):
    # Filter the data for the current dataset
    df_dataset = df[df['Dataset'] == dataset]

    # Calculate the data for the bar chart
    fifth = df_dataset['IndexSize'].tolist()
    data = [first, second, third, fourth, fifth]
    totals = [sum(y) for y in zip(*data)]
    data_percent = [[y / total * 100 for y, total in zip(y, totals)] for y in data]

    # Initialize bottom_y elements to 0
    bottom_y = [0] * len(labels)

    # Plot each component of the bar
    count = 0
    for y, label in zip(data_percent, legend_labels):
        bars = axs[i].bar(x, y, width, bottom=bottom_y, color=palette[count *2], label=label)
        # Accumulate data to calculate new bottom_y
        bottom_y = [a+b for a, b in zip(y, bottom_y)]
        count += 1

        # Add the bar object to the list
        bar_list.append(bars)

    axs[i].set_xticks(x)
    axs[i].set_xticklabels(labels, rotation=90, size=45)
    axs[i].tick_params(axis='y', labelsize=40)
    if i == 0:
        axs[i].set_ylabel('Percentage in Memory Consumption', size=50)
    axs[i].set_title(f'{dataset}', size=50)

# Add a legend
fig.legend(bar_list[:len(legend_labels)], legend_labels, loc='upper center', bbox_to_anchor=(0.5, 1.125),
           fancybox=False, shadow=False, ncol=5, fontsize=50)

plt.tight_layout()
plt.savefig("MemoryPercentage-7A.pdf", format='pdf', bbox_inches='tight')
plt.show()