In [69]:
from plotly.subplots import make_subplots
import numpy as np
import pandas as pd
import plotly.express as px

# Python Dynamic Array Performance

#### Load the data

In [20]:
df = pd.read_csv('perf_log.csv')

In [43]:
df.goal.unique()

array(['intArrayCosts', 'stringCreateCosts', 'stringMatch'], dtype=object)

In [62]:
agg_df = df.groupby(['goal','function','arg'])['time_elapsed','max_memory'].mean().reset_index()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



### String Matching
*Lessons learned*
* String appending is terrible for performance. Remarkably it uses the least memory though, still don't use it
* Comprehension better than array_append (~20%) faster

In [75]:
fig1_time = px.line(agg_df[(agg_df['goal'] == 'stringMatch') & (agg_df['function'] != 'read_file')], x="arg", y="time_elapsed", color='function')
for i in range(len(fig1_time.data)):
    fig1_time.data[i].update(mode='markers+lines')
fig1_time.show()

fig1_mem = px.line(agg_df[(agg_df['goal'] == 'stringMatch') & (agg_df['function'] != 'read_file')], x="arg", y="max_memory", color='function')
for i in range(len(fig1_mem.data)):
    fig1_mem.data[i].update(mode='markers+lines')
fig1_mem.show()

### String Create Costs
*Lessons Learned*
* Python built-ins (in this case random_choicses) work very well!
* String append, once again is extremely slow

In [76]:
fig2_time = px.line(agg_df[(agg_df['goal'] == 'stringCreateCosts') & (agg_df['function'] != 'read_file')], x="arg", y="time_elapsed", color='function')
for i in range(len(fig2_time.data)):
    fig2_time.data[i].update(mode='markers+lines')
fig2_time.show()

In [81]:
fig2_mem = px.line(agg_df[(agg_df['goal'] == 'stringCreateCosts') & (agg_df['function'] != 'read_file')], x="arg", y="max_memory", color='function')
for i in range(len(fig2_mem.data)):
    fig2_mem.data[i].update(mode='markers+lines')
fig2_mem.show()

### Int Array Costs
*Lessons Learned*
* Overall, python optimizes these pretty well
* Zero array is best, but it's all pretty even

In [78]:
fig3_time = px.line(agg_df[(agg_df['goal'] == 'intArrayCosts') & (agg_df['function'] != 'read_file')], x="arg", y="time_elapsed", color='function')
for i in range(len(fig3_time.data)):
    fig3_time.data[i].update(mode='markers+lines')
fig3_time.show()

In [80]:
fig3_mem = px.line(agg_df[(agg_df['goal'] == 'intArrayCosts') & (agg_df['function'] != 'read_file')], x="arg", y="max_memory", color='function')
for i in range(len(fig3_mem.data)):
    fig3_mem.data[i].update(mode='markers+lines')
fig3_mem.show()