In [9]:
!pip install matplotlib==3.0.3
!pip install tikzplotlib==0.9.4

Collecting tikzplotlib==0.9.4
  Using cached tikzplotlib-0.9.4-py3-none-any.whl (52 kB)
Installing collected packages: tikzplotlib
  Attempting uninstall: tikzplotlib
    Found existing installation: tikzplotlib 0.9.2
    Uninstalling tikzplotlib-0.9.2:
      Successfully uninstalled tikzplotlib-0.9.2
Successfully installed tikzplotlib-0.9.4


In [10]:
import numpy as np
import matplotlib.pyplot as plt
import tikzplotlib
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

data_ALL = np.genfromtxt('../results/paper_comparison.csv',delimiter=',', names=True, dtype=None,encoding=None) 

datasets = []

for i in range(2,14):
    name = ""
    if i in [8,9,10]:
        name = "URL"
    if i in [5,6,7]:
        name = "5GRAM"
    if i in [2,3,4]:
        name = "DNA"
    if i == 11:
        name = "GOV2_avg_+10M"
    if i == 12:
        name = "GOV2_avg_1M-10M"
    if i == 13:
        name = "GOV2_avg_100K-1M"
    if name != "":
        datasets.append({"data":data_ALL[i-2],"name":name})

datasets.reverse()

start_bpc = 6
end_bpc = 15
step_bpc = 1

start_e = 3
end_e = 9
step_e = 1

start_b = 4
end_b = 17
step_b = 4

def plot_scatter_coders(data, op, database_name,ax):
    forLegend = []

    delta_space = []
    delta_time = []

    gamma_space = []
    gamma_time = []

    for i, e in enumerate(range(start_e, end_e, step_e)):
        delta_time.append(data['elias_delta_'+str(e)+'_time_'+op])
        delta_space.append(data['elias_delta_'+str(e)+'_bpk'])
        forLegend.append('sdsl::enc_vector<δ, 2^'+str(e)+'>')
        ax.plot(delta_time[-1], delta_space[-1],marker='h', color='C'+str(i))

        gamma_time.append(data['elias_gamma_'+str(e)+'_time_'+op])
        gamma_space.append(data['elias_gamma_'+str(e)+'_bpk'])
        forLegend.append('sdsl::enc_vector<γ, 2^'+str(e)+'>')
        ax.plot(gamma_time[-1], gamma_space[-1],marker='d', color='C'+str(i))

    ax.plot(delta_time, delta_space, linewidth=0.75,
             c='gray', label='_nolegend_')
    ax.plot(gamma_time, gamma_space, linewidth=0.75,
             c='gray', label='_nolegend_')

    return forLegend


def plot_elias_fano(data, op, database_name,ax):
    forLegend = []

    ef_time = []
    ef_space = []

    ef_time.append(data['ef_sd_time_'+op])
    ef_space.append(data['ef_sd_bpk'])

    ax.plot(data['ef_sd_time_'+op], data['ef_sd_bpk'],marker='s', color='C1')
    forLegend.append('sdsl::sd_vector')

    ax.plot(data['uniform_'+op], data['uniform_sequence_bpk'], marker='s', color='C3')
    forLegend.append('ds2i::uniform_partitioned')

    ef_time.append(data['uniform_'+op])
    ef_space.append(data['uniform_sequence_bpk'])

    ax.plot(data['partitioned_'+op],data['partitioned_bpk'], marker='s', color='C4')
    forLegend.append('ds2i::opt_partitioned')

    ef_time.append(data['partitioned_'+op])
    ef_space.append(data['partitioned_bpk'])

    ax.plot(ef_time, ef_space, linewidth=0.75, c='gray', label='_nolegend_')  

    return forLegend

def plot_array_RRR(data, op, database_name,ax):
    forLegend = []
        
    rrr_time = []
    rrr_space = []
    for i,j in enumerate([15,31,63,127]):
        if  data['ef_rrr_'+str(j)+'_bpk'] < 20 :
            rrr_time.append(data['ef_rrr_'+str(j)+'_time_'+op])
            rrr_space.append(data['ef_rrr_'+str(j)+'_bpk'])
            ax.plot(rrr_time[-1], rrr_space[-1],marker='v', color='C'+str(i+2))
            forLegend.append('sdsl::rrr_vector_<'+str(j)+'>')

    ax.plot(rrr_time, rrr_space, linewidth=0.75,
             c='gray', label='_nolegend_')        
    return forLegend

def plot_scatter_la_vector(data, op, database_name,ax):
    forLegend = []
    la_vector_op_time = []
    la_vector_op_space = []

    for i, bpc in enumerate(range(start_bpc, end_bpc, step_bpc)):
        if data['la_vector_'+str(bpc)+'_bpk'] < 20: 
            la_vector_op_time.append(data['la_vector_'+str(bpc)+'_time_'+op])
            la_vector_op_space.append(data['la_vector_'+str(bpc)+'_bpk'])
            ax.plot(la_vector_op_time[-1], la_vector_op_space[-1],
                        marker='x', color='C'+str(i))
            forLegend.append('la_vector<'+str(bpc)+'>')

    ax.plot(la_vector_op_time, la_vector_op_space, linewidth=0.75,
             c='gray', label='_nolegend_')
    return forLegend

def plot_scatter_la_vector_opt(data, op, database_name,ax):
    forLegend = []

    ax.plot(data['la_vector_opt_time_'+op], data['la_vector_opt_bpk'],
                marker='*', color='C0')
    forLegend.append('la_vector*')

    return forLegend

def rreplace(s, old, new, occurrence):
    li = s.rsplit(old, occurrence)
    return new.join(li)

    
def nth_replace(s, sub, repl, n):
    find = s.find(sub)
    # If find is not -1 we have found at least one match for the substring
    i = find != -1
    # loop util we find the nth or we find no match
    while find != -1 and i != n:
        # find + 1 means we start searching from after the last match
        find = s.find(sub, find + 1)
        i += 1
    # If i is equal to n we found nth match so replace
    if i == n:
        return s[:find] + repl + s[find+len(sub):]
    return s

def print_one(data, op, dataset_name,plot_index):
    forLegend = []
    forLegend = forLegend + plot_array_RRR(data, op, dataset_name,ax[plot_index//3,plot_index%3])
    forLegend = forLegend + plot_elias_fano(data, op, dataset_name,ax[plot_index//3,plot_index%3])
    forLegend = forLegend + plot_scatter_la_vector(data, op, dataset_name,ax[plot_index//3,plot_index%3])
    forLegend = forLegend + plot_scatter_la_vector_opt(data, op, dataset_name,ax[plot_index//3,plot_index%3])
    forLegend = forLegend + plot_scatter_coders(data, op, dataset_name,ax[plot_index//3,plot_index%3])
    ax[plot_index//3,plot_index%3].set_title('\\textsc{'+dataset_name+' (' + str(data['ratio']*100)[0:5] + '%)}')
    return forLegend


## Select plot benchmark (Figure 4): 

In [11]:
nrows = 4
ncolumn = 3

plot_index = 0
fig, ax = plt.subplots(nrows=nrows, ncols=ncolumn)
forLegend = []
for dataset in datasets:
    forLegend = print_one(dataset['data'], "select", dataset['name'],plot_index)
    plot_index+=1

for i,ax1 in enumerate(ax.flat):
    if i % ncolumn == 0:
        ax1.set(ylabel='Space (bits per integer)')
    if (((nrows-1)*(ncolumn))) <= i <= ((nrows*ncolumn) - 1):
        ax1.set(xlabel='Select time (nanoseconds)')
    ax1.xaxis.grid(True, which='minor', ls=':')
    ax1.yaxis.grid(True, which='minor', ls=':')
    ax1.grid(True, ls=':')
    ax1.xaxis.set_minor_locator(AutoMinorLocator())
    ax1.yaxis.set_minor_locator(AutoMinorLocator())
    ax1.locator_params(nbins=6, axis='x')
    ax1.locator_params(nbins=4, axis='y')   
 

F = plt.gcf()
Size = F.get_size_inches()
F.set_size_inches(Size[0]*2.5, Size[1]*4, forward=True)


lgd = fig.legend(forLegend,scatterpoints=1, loc='center right',  prop={'size': 11}, borderaxespad=0.)

tikz = tikzplotlib.get_tikz_code(
    #axis_width='164pt',
    #axis_height='150pt',
    standalone=True,
    extra_axis_parameters=['title style={yshift=-2ex,font=\\footnotesize},minor x tick num=2,minor y tick num=2,grid style={dotted}'],
    )

tikz = tikz.replace('mark=square*, mark size=3', 'mark=square*, only marks, mark size=1.5pt')
tikz = tikz.replace('mark=diamond*', 'mark=text, text mark=$\mathbf{\gamma}$')
tikz = tikz.replace('mark=star', 'mark=text, text mark=$\mathbf{\delta}$')
tikz = tikz.replace('asterisk', 'otimes')
tikz = tikz.replace('mark size=3', 'mark size=2.4pt')
tikz = tikz.replace('mark=x,', 'mark=x, only marks,')
tikz = tikz.replace('mark=otimes,', 'mark=otimes, only marks,')
tikz = tikz.replace('mark=triangle*, mark size=2.4pt', 'mark=triangle*, only marks, mark size=2.2pt')
tikz = tikz.replace('mark=$\mathbf{\delta}$, mark size=2.4pt','mark=$\mathbf{\delta}$, text mark as node, text mark style={font=\scriptsize}')
tikz = tikz.replace('mark=$\mathbf{\gamma}$, mark size=2.4pt','mark=$\mathbf{\gamma}$, text mark as node, text mark style={font=\scriptsize}')
tikz = tikz.replace('usetikzlibrary{patterns,shapes.arrows}','usetikzlibrary{patterns,shapes.arrows,matrix}')
tikz = tikz.replace('text mark=$\mathbf{\delta}$,', 'text mark=$\mathbf{\delta}$, only marks,')
tikz = tikz.replace('text mark=$\mathbf{\gamma}$,', 'text mark=$\mathbf{\gamma}$, only marks,')

index_plot = 1
for color in [3,0,1]:
    tikz = rreplace(tikz, '\\addplot [semithick, color'+str(color)+', mark=triangle*,' ,'\\label{plots:plot'+str(index_plot)+'} \\addplot [semithick, color'+str(color)+', mark=triangle*,', 1)
    index_plot+=1

index_label = tikz.find('1191.816 3.011311')
index_label += 20
tikz = tikz[:index_label] + '\\label{plots:plot4} \n' + tikz[index_label:]

index_plot = 5
for color in [3,0]:
    tikz = rreplace(tikz, '\\addplot [semithick, color'+str(color)+', mark=square*,' ,'\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=square*,', 1)
    index_plot+=1

index_label = tikz.find('471.6388 3.464972')
index_label += 21
tikz = tikz[:index_label] + '\\label{plots:plot7} \n' + tikz[index_label:]

index_plot = 8
for color in [2,5,3,0,1,6]:
    tikz = rreplace(tikz, '\\addplot [semithick, color'+str(color)+', mark=x,' ,'\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=x,', 1)
    index_plot+=1

tikz = rreplace(tikz, '\\addplot [semithick, white!49.8039215686275!black, mark=x,' ,'\\label{plots:plot14} \n\\addplot [semithick, white!49.8039215686275!black, mark=x,', 1)
tikz = rreplace(tikz, '\\addplot [semithick, color7, mark=x,' ,'\\label{plots:plot15} \n\\addplot [semithick, color7, mark=x,', 1)

index_label = tikz.find('76.85571 14.00053')
index_label += 21
tikz = tikz[:index_label] + '\\label{plots:plot16} \n' + tikz[index_label:]

index_plot = 17
for color in [4,2,5,3,0,1]:
    tikz = rreplace(tikz,'\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\mathbf{\delta}$,','\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\\mathbf{\\delta}$,',1)
    index_plot+=1
    tikz = rreplace(tikz,'\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\mathbf{\gamma}$,','\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\\mathbf{\\gamma}$,',1)
    index_plot+=1

index_label = tikz.find('596.8635 3.393474')
index_label += 21
tikz = tikz[:index_label] + '\\label{plots:plot29} \n' + tikz[index_label:]


index_top = tikz.rfind('198.299553846154 1.56836269230769')
index_top += 36
tikz = tikz[:index_top] + '\n\\coordinate (top) at (rel axis cs:0,5); \n' + tikz[index_top:]

file = open('legend.txt',mode='r')
all_legend = file.read()

index_legend = tikz.rfind('\\end{tikzpicture}')
tikz = tikz[:index_legend] + all_legend + tikz[index_legend:]

with open('plot_select.tex', 'w') as f:
    f.write(tikz)


TypeError: get_tikz_code() got an unexpected keyword argument 'axis_width'

## Rank function benchmark (Figure 5):

In [None]:
nrows = 4
ncolumn = 3

plot_index = 0
fig, ax = plt.subplots(nrows=nrows, ncols=ncolumn)
forLegend = []
for dataset in datasets:
    forLegend = print_one(dataset['data'], "rank", dataset['name'],plot_index)
    plot_index+=1

for i,ax1 in enumerate(ax.flat):
    if i % ncolumn == 0:
        ax1.set(ylabel='Space (bits per integer)')
    if (((nrows-1)*(ncolumn))) <= i <= ((nrows*ncolumn) - 1):
        ax1.set(xlabel='Rank time (nanoseconds)')
    ax1.xaxis.grid(True, which='minor', ls=':')
    ax1.yaxis.grid(True, which='minor', ls=':')
    ax1.grid(True, ls=':')
    ax1.xaxis.set_minor_locator(AutoMinorLocator())
    ax1.yaxis.set_minor_locator(AutoMinorLocator())
    ax1.locator_params(nbins=6, axis='x')
    ax1.locator_params(nbins=4, axis='y') 

fig.legend(forLegend,scatterpoints=1, loc='center right', bbox_to_anchor=(1, 0.5) ,  prop={'size': 11})

F = plt.gcf()
Size = F.get_size_inches()
F.set_size_inches(Size[0]*2.5, Size[1]*4, forward=True)

tikz = tikzplotlib.get_tikz_code(
    #axis_width='164pt',
    #axis_height='150pt',
    standalone=True,
    extra_axis_parameters=['title style={yshift=-2ex,font=\\footnotesize},minor x tick num=2,minor y tick num=2,grid style={dotted},'],
    )
tikz = tikz.replace('mark=square*, mark size=3', 'mark=square*, only marks, mark size=1.5pt')
tikz = tikz.replace('mark=diamond*', 'mark=text, text mark=$\mathbf{\gamma}$')
tikz = tikz.replace('mark=star', 'mark=text, text mark=$\mathbf{\delta}$')
tikz = tikz.replace('asterisk', 'otimes')
tikz = tikz.replace('mark size=3', 'mark size=2.4pt')
tikz = tikz.replace('mark=x,', 'mark=x, only marks,')
tikz = tikz.replace('mark=otimes,', 'mark=otimes, only marks,')
tikz = tikz.replace('mark=triangle*, mark size=2.4pt', 'mark=triangle*, only marks, mark size=2.2pt')
tikz = tikz.replace('mark=$\mathbf{\delta}$, mark size=2.4pt','mark=$\mathbf{\delta}$, text mark as node, text mark style={font=\scriptsize}')
tikz = tikz.replace('mark=$\mathbf{\gamma}$, mark size=2.4pt','mark=$\mathbf{\gamma}$, text mark as node, text mark style={font=\scriptsize}')
tikz = tikz.replace('usetikzlibrary{patterns,shapes.arrows}','usetikzlibrary{patterns,shapes.arrows,matrix}')
tikz = tikz.replace('text mark=$\mathbf{\delta}$,', 'text mark=$\mathbf{\delta}$, only marks,')
tikz = tikz.replace('text mark=$\mathbf{\gamma}$,', 'text mark=$\mathbf{\gamma}$, only marks,')

index_plot = 1
for color in [3,0,1]:
    tikz = rreplace(tikz, '\\addplot [semithick, color'+str(color)+', mark=triangle*,' ,'\\label{plots:plot'+str(index_plot)+'} \\addplot [semithick, color'+str(color)+', mark=triangle*,', 1)
    index_plot+=1

index_label = tikz.find('686.1192 3.011311')
index_label += 20
tikz = tikz[:index_label] + '\\label{plots:plot4} \n' + tikz[index_label:]

index_plot = 5
for color in [3,0]:
    tikz = rreplace(tikz, '\\addplot [semithick, color'+str(color)+', mark=square*,' ,'\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=square*,', 1)
    index_plot+=1

index_label = tikz.find('418.8362 3.464972')
index_label += 21
tikz = tikz[:index_label] + '\\label{plots:plot7} \n' + tikz[index_label:]

index_plot = 8
for color in [2,5,3,0,1,6]:
    tikz = rreplace(tikz, '\\addplot [semithick, color'+str(color)+', mark=x,' ,'\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=x,', 1)
    index_plot+=1

tikz = rreplace(tikz, '\\addplot [semithick, white!49.8039215686275!black, mark=x,' ,'\\label{plots:plot14} \n\\addplot [semithick, white!49.8039215686275!black, mark=x,', 1)
tikz = rreplace(tikz, '\\addplot [semithick, color7, mark=x,' ,'\\label{plots:plot15} \n\\addplot [semithick, color7, mark=x,', 1)

index_label = tikz.find('441.7003 14.00053')
index_label += 21
tikz = tikz[:index_label] + '\\label{plots:plot16} \n' + tikz[index_label:]

index_plot = 17
for color in [4,2,5,3,0,1]:
    tikz = rreplace(tikz,'\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\mathbf{\delta}$,','\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\\mathbf{\\delta}$,',1)
    index_plot+=1
    tikz = rreplace(tikz,'\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\mathbf{\gamma}$,','\\label{plots:plot'+str(index_plot)+'} \n\\addplot [semithick, color'+str(color)+', mark=text, text mark=$\\mathbf{\\gamma}$,',1)
    index_plot+=1

index_label = tikz.find('1833.978 3.393474')
index_label += 21
tikz = tikz[:index_label] + '\\label{plots:plot29} \n' + tikz[index_label:]


index_top = tikz.rfind('1220.13361538462 1.56836269230769')
index_top += 36
tikz = tikz[:index_top] + '\n\\coordinate (top) at (rel axis cs:0,5); \n' + tikz[index_top:]

file = open('legend.txt',mode='r')
all_legend = file.read()

index_legend = tikz.rfind('\\end{tikzpicture}')
tikz = tikz[:index_legend] + all_legend + tikz[index_legend:]

with open('plot_rank.tex', 'w') as f:
    f.write(tikz)
