In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from export import update_mpl, figsize, save, DPI
from tools import Figure, Table
update_mpl()

In [None]:
import os
from config import params

# Aardwolf frontend generates one more .pyc file compared to non-Aardwolf version.
# This function removes it.
# It stores normalized data into a new file with '_normalized' suffix.
def normalize_matplotlib_data():
    baseline_path = os.path.join(params['results.scalability'], 'matplotlib_binary_baseline.csv')
    aardwolf_path = os.path.join(params['results.scalability'], 'matplotlib_binary_aardwolf.csv')
    
    baseline = pd.read_csv(baseline_path)
    aardwolf = pd.read_csv(aardwolf_path)
    
    baseline_files = set(baseline['File'].values)
    aardwolf_files = set(aardwolf['File'].values)
    
    mismatch = baseline_files ^ aardwolf_files
    baseline = baseline[~baseline['File'].isin(mismatch)]
    aardwolf = aardwolf[~aardwolf['File'].isin(mismatch)]
    
    assert all(lhs == rhs for (lhs, rhs) in zip(baseline['File'].values, aardwolf['File'].values))
    
    baseline.to_csv(baseline_path.replace('.csv', '_normalized.csv'), index=False)
    aardwolf.to_csv(aardwolf_path.replace('.csv', '_normalized.csv'), index=False)

In [None]:
normalize_matplotlib_data()

In [None]:
import os
from config import params

class Overhead(Figure):
    def _load(self, program, metric, variant):
        assert program in ['libtiff', 'matplotlib']
        assert metric in ['time', 'memory', 'binary']
        assert variant in ['baseline', 'aardwolf']
        
        filename = program
        if metric in ['time', 'memory']:
            filename += '_time_memory'
        elif metric == 'binary':
            filename += '_binary'
            
        filename += f'_{variant}.csv'
        
        # Use normalized function. See a cell above.
        if program == 'matplotlib' and metric == 'binary':
            filename = filename.replace('.csv', '_normalized.csv')
        
        data = pd.read_csv(os.path.join(params['results.scalability'], filename))
        
        if metric == 'time':
            return data['Time [s]'].values
        elif metric == 'memory':
            return data['Peak memory usage [kB]'].values
        elif metric == 'binary':
            return data['Binary size [B]'].values
    
    def _calc_overhead(self, baseline, aardwolf):
        assert len(baseline) == len(aardwolf)
        x = np.array(aardwolf) / np.array(baseline)
        x = x[~np.isnan(x)]
        return x
    
    def _human_friendly(self, metric):
        assert metric in ['time', 'memory', 'binary']
        if metric == 'time':
            return 'Execution time'
        elif metric == 'memory':
            return 'Peak memory usage'
        elif metric == 'binary':
            return 'Binaries size'
    
    def _render(self):
        fig = plt.figure(figsize=figsize(1, 0.4), dpi=DPI)
        
        metrics = ['time', 'memory', 'binary']
        for idx, metric in enumerate(metrics):
            libtiff = self._calc_overhead(
                self._load('libtiff', metric, 'baseline'),
                self._load('libtiff', metric, 'aardwolf'))
            
            matplotlib = self._calc_overhead(
                self._load('matplotlib', metric, 'baseline'),
                self._load('matplotlib', metric, 'aardwolf'))
            
            scale = 'linear'
            if metric == 'time':
                scale = 'log'
            
            ax = fig.add_subplot(1, 3, idx + 1)
            self._render_ax(ax, libtiff, matplotlib, self._human_friendly(metric), scale)
        
        fig.tight_layout()
        
        return fig
    
    def _render_ax(self, ax, libtiff, matplotlib, metric, scale='linear'):
        assert scale in ['linear', 'log']
        label = metric
        if scale == 'log':
            libtiff, matplotlib = np.log10(libtiff), np.log10(matplotlib)
            label = f'{metric} ($\\log_{{10}}$)'
        
        ax.boxplot([libtiff, matplotlib], widths=0.25)
        
        ax.set_xticklabels(['LibTIFF', 'matplotlib'])
        ax.set_ylabel(label)
        
        if scale == 'log':
            ax.set_yticks([y for y in ax.get_yticks() if int(y) == y])
            ax.set_yticklabels([int(10 ** y) for y in ax.get_yticks()])

In [None]:
import os

class Components(Figure):
    def _load(self, program):
        filename = f'{program}_plugins.csv'
        return pd.read_csv(os.path.join(params['results.scalability'], filename))
    
    def _render(self):
        fig, ax = plt.subplots(nrows=1, ncols=2, figsize=figsize(1), dpi=DPI)
        
        libtiff = self._load('libtiff')
        matplotlib = self._load('matplotlib')
        data = pd.merge(libtiff, matplotlib, on='Plugin', how='outer')
        data = data.rename(columns={'Time [s]_x': 'LibTIFF', 'Time [s]_y': 'matplotlib'})
        data = data.fillna(0)
        
        data['Plugin'] = data['Plugin'].map({
            'load data': 'Data loading',
            'init plugins': 'Plugins init.',
            'DStar Spectrum (loc)': 'DStar',
            'Probabilistic Dependence (loc)': 'Prob. Dep.',
            'Likely Invariants (loc)': 'Invariants'
        })
        data = data[~data['Plugin'].isna()]
        
        labels = data['Plugin'].values
        handles, texts = self._render_ax(ax[0], data, 'LibTIFF')
        self._render_ax(ax[1], data, 'matplotlib')
        
        fig.legend(
            handles,
            labels,
            loc='lower center',
            bbox_to_anchor=(0, 0.9, 1, 1),
            ncol=len(data['Plugin']),
            handlelength=1,
            handletextpad=0.5,
            columnspacing=1)
        
        fig.tight_layout()
        return fig
    
    def _render_ax(self, ax, data, column):
        def _time_fmt(secs):
            if secs >= 60:
                mins = secs // 60
                secs = secs % 60
                return f'{mins}\\,m {secs}\\,s'
            else:
                return f'{secs}\\,s'
        
        def _autopct(x):
            if x > 10:
                absolute = int(np.round(x / 100. * values.sum(), 0))
                return _time_fmt(absolute)
            else:
                return ''
            
        values = data[column]
        handles, texts, _ = ax.pie(values, startangle=90, autopct=_autopct)
        ax.axis('equal') 
        ax.set_xlabel(column)
        
        return handles, texts

In [None]:
Overhead().show().save('overhead.pgf')

In [None]:
Components().show().save('components-times.pgf')