In [None]:
!pip install matplotlib
!pip install tikzplotlib

## Bar chart (Figure 3):

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import tikzplotlib
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)


def bar_plot(ax, data, colors=None, total_width=0.8, single_width=1):
    """Draws a bar plot with multiple bars per data point.

    Parameters
    ----------
    ax : matplotlib.pyplot.axis
        The axis we want to draw our plot on.

    data: dictionary
        A dictionary containing the data we want to plot. Keys are the names of the
        data, the items is a list of the values.

        Example:
        data = {
            "x":[1,2,3],
            "y":[1,2,3],
            "z":[1,2,3],
        }

    colors : array-like, optional
        A list of colors which are used for the bars. If None, the colors
        will be the standard matplotlib color cyle. (default: None)

    total_width : float, optional, default: 0.8
        The width of a bar group. 0.8 means that 80% of the x-axis is covered
        by bars and 20% will be spaces between the bars.

    single_width: float, optional, default: 1
        The relative width of a single bar within a group. 1 means the bars
        will touch eachother within a group, values less than 1 will make
        these bars thinner.

    legend: bool, optional, default: True
        If this is set to true, a legend will be added to the axis.
    """

    # Check if colors where provided, otherwhise use the default color cycle
    if colors is None:
        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

    # Number of bars per group
    n_bars = len(data)

    # The width of a single bar
    bar_width = total_width / n_bars

    # List containing handles for the drawn bars, used for the legend
    bars = []

    # Iterate over all data
    for i, (name, values) in enumerate(data.items()):
        # The offset in x direction of that bar
        x_offset = (i - n_bars / 2) * bar_width + bar_width / 2
        legend = True
        # Draw a bar for every value of that type
        for x, y in enumerate(values):
            bar = ax.bar(x + x_offset, y, width=bar_width * single_width, color=colors[i % len(colors)],label=name if legend else "")
            legend = False

        # Add a handle to the last drawn bar, which we'll need for the legend
        bars.append(bar[0])

    # Draw legend
    #ax.legend(bars, data.keys())

data_ALL = np.genfromtxt('../results/paper_comparison.csv',delimiter=',', names=True, dtype=None,encoding=None) 

data_DNA = data_ALL[0]
data_5GRAM = data_ALL[3]
data_URL = data_ALL[6]
data_GOV2 = data_ALL[12]

# set width of bar
barWidth = 0.25

start_bpc = 6
end_bpc = 15
step_bpc = 1

segments_DNA = []
segments_5GRAM = []
segments_MYURLS = []
segments_GOV2 = []


for i, bpc in enumerate(range(start_bpc, end_bpc, step_bpc)):
    segments_DNA.append(data_DNA['la_vector_'+str(bpc)+'_segments'] / data_DNA['n'])
    segments_5GRAM.append(data_5GRAM['la_vector_'+str(bpc)+'_segments'] / data_5GRAM['n'])
    segments_MYURLS.append(data_URL['la_vector_'+str(bpc)+'_segments'] / data_URL['n'])
    segments_GOV2.append(data_GOV2['la_vector_'+str(bpc)+'_segments'] / data_GOV2['n'])


data = {
    "GOV2   $n = "+str(int(data_GOV2['n']/1000000))+"$M  $u = "+str(int(data_GOV2['u']/1000000))+"$M": segments_GOV2,
    "URL    $n = "+str(int(data_URL['n']/1000000))+"$M  $u = "+str(int(data_URL['u']/1000000))+"$M": segments_MYURLS,
    "5GRAM    $n = "+str(int(data_5GRAM['n']/1000000))+"$M  $u = "+str(int(data_5GRAM['u']/1000000))+"$M": segments_5GRAM,
    "DNA    $n = "+str(int(data_DNA['n']/1000000))+"$M   $u = "+str(int(data_DNA['u']/1000000))+"$M": segments_DNA
}

fig, ax = plt.subplots()
bar_plot(ax, data, total_width=.8, single_width=.95)

clabels = [x for x in range(start_bpc, end_bpc, step_bpc)]
ax.set_xticks([r  for r  in range(end_bpc-start_bpc)])
ax.set_xticklabels(clabels)
ax.grid(True, ls=':')
ax.set_yscale('log')
F = plt.gcf()
Size = F.get_size_inches()
F.set_size_inches(Size[0]*2, Size[1]*2, forward=True)

plt.legend()

tikz = tikzplotlib.get_tikz_code(
    axis_width='350pt',
    axis_height='250pt',
    standalone=True,
    extra_axis_parameters=['title style={font=\\footnotesize},grid style={dotted},ylabel = $\ell / n$, xlabel = Correction size $c$,\n'
        'legend style={font=\\footnotesize,}'],
    )

with open('plot_bars.tex', 'w') as f:
    f.write(tikz.replace('0) rect', '1e-06) rect'))

