In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('execution_data.csv', dtype={'pINDs': int, 'threads': int})
df

In [None]:
df = df[['enqueue', 'initialization', 'pINDCreation', 'pINDValidation', 'spilledFiles', 'threads', 'maxMemory']]
df['total_time'] = np.sum(df[['enqueue', 'initialization', 'pINDCreation', 'pINDValidation']], axis=1)

In [None]:
grouped = df.groupby(['threads', 'maxMemory']).agg('mean').reset_index()
grouped['total_time'] = np.round(grouped['total_time']/1000)
grouped['spilledFiles'] = np.round(grouped['spilledFiles'])
grouped

In [None]:
pivoted = grouped.pivot(index='threads', columns='maxMemory', values='total_time')

In [None]:
sns.heatmap(pivoted, cmap='crest', annot=True, fmt='g')
plt.title('Total execution time in seconds (TPCH-1, 4GB RAM)')
plt.show()

In [None]:
fig, ax = plt.subplots(1,5)
fig.set_size_inches(30, 5)
for c, maxM in enumerate(sorted(df['maxMemory'].unique())):
    filtered = df[df['maxMemory'] == maxM]
    filtered = filtered.sort_values('threads')
    ax[c].bar(np.array(filtered['threads'], dtype=str), filtered['initialization'], color='#708B91', label='initialization')
    ax[c].bar(np.array(filtered['threads'], dtype=str), filtered['enqueue'], bottom=filtered['initialization'], color='#E8DDAE', label='enqueue')
    ax[c].bar(np.array(filtered['threads'], dtype=str), filtered['pINDValidation'], bottom=filtered['enqueue']+filtered['initialization'], color='#7FD5EB', label='validation')
    ax[c].legend()
    ax[c].set_ylabel('Execution time (ms)')
    ax[c].set_ylim(top=67000)
    ax[c].set_xlabel('Number of threads')
    ax[c].set_title(f'{maxM}% memory')

fig.suptitle('Execution time distribution under changing memory percentages (TPCH-1, 4GB)', fontsize=16)
plt.show()