In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

In [2]:
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

In [3]:
def read_data(filename):
    rollup = pd.read_csv(filename, header = None)
    rollup = rollup[[0, 1, 7, 8]]
    rollup[0] = rollup[0].apply(lambda x: int(x.replace("(", "")))
    rollup[8] = rollup[8].apply(lambda x: float(x.replace(")", "")))
    
    for i in [0, 1]:
        rollup[i] = rollup[i].apply(lambda x: int(x))
    for i in [7, 8]:
        rollup[i] = rollup[i].apply(lambda x: float(x))

    rollup = rollup.sort_values(by=[0, 1, 7])
    rollup = rollup.rename(columns={0:"size", 1:"n_indices", 7:"mean", 8:"stdev"})
    
    return rollup

In [4]:
naive_filename = "../rollup/results_rollup_naive_avg.txt"
opt_filename = "../rollup/results_rollup_opt_avg.txt"

rollup_naive = read_data(naive_filename)
rollup_opt = read_data(opt_filename)

In [5]:
rollup_naive_small = rollup_naive[rollup_naive["size"] == 6008]
rollup_opt_small = rollup_opt[rollup_opt["size"] == 6008]

In [6]:
plt.errorbar(rollup_naive_small["n_indices"], rollup_naive_small["mean"], yerr=rollup_naive_small["stdev"])
plt.errorbar(rollup_opt_small["n_indices"], rollup_opt_small["mean"], yerr=rollup_opt_small["stdev"])

<ErrorbarContainer object of 3 artists>

In [17]:
plt.figure(figsize=(17, 8))
label = "lineorder_small"

# Plot data and save figure
markers, caps, bars = plt.errorbar(rollup_naive_small["n_indices"], rollup_naive_small["mean"], yerr=rollup_naive_small["stdev"],
                                   label="Naive", color="blue", marker="D", markersize=7)
[bar.set_alpha(0.5) for bar in bars]

markers, caps, bars = plt.errorbar(rollup_opt_small["n_indices"], rollup_opt_small["mean"], yerr=rollup_opt_small["stdev"],
                                   label = "Optimized", color="orange", marker="o", markersize=7)
[bar.set_alpha(0.5) for bar in bars]
plt.grid(linestyle='dotted')

# set labels (LaTeX can be used) -> Note: with the setting deactivated, this will print \textbf{...}
plt.xlabel(r'\textbf{Number of indices to group}', fontsize=15)
plt.ylabel(r'\textbf{Query execution time [s]}', fontsize=15)
plt.title(r'\textbf{Execution time change with number of indices to group - ' + label + ' dataset}', fontsize=18)
plt.legend()
# plt.xscale("log")

plt.savefig("plot/rollup/indices_change_lineorder_small.pdf")
plt.close()

In [8]:
rollup_naive_medium = rollup_naive[rollup_naive["size"] == 600572]
rollup_opt_medium = rollup_opt[rollup_opt["size"] == 600572]

plt.figure(figsize=(17, 8))
label = "lineorder_medium"

# Plot data and save figure
markers, caps, bars = plt.errorbar(rollup_naive_medium["n_indices"], rollup_naive_medium["mean"], yerr=rollup_naive_medium["stdev"],
                                   label="Naive", color="blue", marker="D", markersize=7)
[bar.set_alpha(0.5) for bar in bars]

markers, caps, bars = plt.errorbar(rollup_opt_medium["n_indices"], rollup_opt_medium["mean"], yerr=rollup_opt_medium["stdev"],
                                   label = "Optimized", color="orange", marker="o", markersize=7)
[bar.set_alpha(0.5) for bar in bars]
plt.grid(linestyle='dotted')

# set labels (LaTeX can be used) -> Note: with the setting deactivated, this will print \textbf{...}
plt.xlabel(r'\textbf{Number of indices to group}', fontsize=15)
plt.ylabel(r'\textbf{Query execution time [s]}', fontsize=15)
plt.title(r'\textbf{Execution time change with number of indices to group - ' + label + ' dataset}', fontsize=18)
plt.legend(loc='upper left')
# plt.xscale("log")

plt.savefig("plot/rollup/indices_change_lineorder_medium.pdf")
plt.close()

In [10]:
rollup_naive_big = rollup_naive[rollup_naive["size"] == 6001171]
rollup_opt_big = rollup_opt[rollup_opt["size"] == 6001171]

plt.figure(figsize=(17, 8))
label = "lineorder_big"

# Plot data and save figure
markers, caps, bars = plt.errorbar(rollup_naive_big["n_indices"], rollup_naive_big["mean"], yerr=rollup_naive_big["stdev"],
                                   label="Naive", color="blue", marker="D", markersize=7)
[bar.set_alpha(0.5) for bar in bars]

markers, caps, bars = plt.errorbar(rollup_opt_big["n_indices"], rollup_opt_big["mean"], yerr=rollup_opt_big["stdev"],
                                   label = "Optimized", color="orange", marker="o", markersize=7)
[bar.set_alpha(0.5) for bar in bars]
plt.grid(linestyle='dotted')

# set labels (LaTeX can be used) -> Note: with the setting deactivated, this will print \textbf{...}
plt.xlabel(r'\textbf{Number of indices to group}', fontsize=15)
plt.ylabel(r'\textbf{Query execution time [s]}', fontsize=15)
plt.title(r'\textbf{Execution time change with number of indices to group - ' + label + ' dataset}', fontsize=18)
plt.legend(loc='upper left')
# plt.xscale("log")

plt.savefig("plot/rollup/indices_change_lineorder_big.pdf")
plt.close()

In [16]:
rollup_naive_big = rollup_naive[rollup_naive["size"] == 2000390]
rollup_opt_big = rollup_opt[rollup_opt["size"] == 2000390]

plt.figure(figsize=(17, 8))
label = "lineorder_big"

# Plot data and save figure
markers, caps, bars = plt.errorbar(rollup_naive_big["n_indices"], rollup_naive_big["mean"], yerr=rollup_naive_big["stdev"],
                                   label="Naive", color="blue", marker="D", markersize=7)
[bar.set_alpha(0.5) for bar in bars]

markers, caps, bars = plt.errorbar(rollup_opt_big["n_indices"], rollup_opt_big["mean"], yerr=rollup_opt_big["stdev"],
                                   label = "Optimized", color="orange", marker="o", markersize=7)
[bar.set_alpha(0.5) for bar in bars]
plt.grid(linestyle='dotted')

# set labels (LaTeX can be used) -> Note: with the setting deactivated, this will print \textbf{...}
plt.xlabel(r'\textbf{Number of indices to group}', fontsize=15)
plt.ylabel(r'\textbf{Query execution time [s]}', fontsize=15)
plt.title(r'\textbf{Execution time change with number of indices to group - ' + label + ' dataset}', fontsize=18)
plt.legend(loc='upper left')
# plt.xscale("log")

plt.savefig("plot/rollup/indices_change_lineorder_big_200k.pdf")
plt.close()

In [19]:
rollup_naive_big = rollup_naive[rollup_naive["size"] == 300286]
rollup_opt_big = rollup_opt[rollup_opt["size"] == 300286]

plt.figure(figsize=(17, 8))
label = "lineorder_big"

# Plot data and save figure
markers, caps, bars = plt.errorbar(rollup_naive_big["n_indices"], rollup_naive_big["mean"], yerr=rollup_naive_big["stdev"],
                                   label="Naive", color="blue", marker="D", markersize=7)
[bar.set_alpha(0.5) for bar in bars]

markers, caps, bars = plt.errorbar(rollup_opt_big["n_indices"], rollup_opt_big["mean"], yerr=rollup_opt_big["stdev"],
                                   label = "Optimized", color="orange", marker="o", markersize=7)
[bar.set_alpha(0.5) for bar in bars]
plt.grid(linestyle='dotted')

# set labels (LaTeX can be used) -> Note: with the setting deactivated, this will print \textbf{...}
plt.xlabel(r'\textbf{Number of indices to group}', fontsize=15)
plt.ylabel(r'\textbf{Query execution time [s]}', fontsize=15)
plt.title(r'\textbf{Execution time change with number of indices to group - ' + label + ' dataset}', fontsize=18)
plt.legend(loc='upper left')
# plt.xscale("log")

plt.savefig("plot/rollup/indices_change_lineorder_big_300k.pdf")
plt.close()