In [None]:
from glob import glob as glob
from datetime import datetime
import csv
import pandas as pd

def get_date(file, year=2018):
    parts = file[len("logs/results-"):-4]
    return datetime(year, int(parts[0:2]), int(parts[2:4]), int(parts[4:6]), int(parts[6:8]))

In [None]:
prev = None
dfs_l = []
for file in glob("logs/*.txt"):
    with open(file, "r") as r:
        rd = csv.reader(r, delimiter="\t", quotechar='"')
        lines_to_df = []
        for l in rd:
            if len(l) <4 : continue
            if l[3] == '0':
                prev = l
            else:
                prev[3] = l[3]
                lines_to_df.append(prev)
                lines_to_df.append(l)
        frame = pd.DataFrame(lines_to_df, 
                             columns=[
                                 'run', 'data', 
                                 'instance', 'records', 
                                 'insertion', 'ins/retrieve','retrieve'])
        frame[['records','insertion','ins/retrieve','retrieve']] = frame[['records','insertion','ins/retrieve','retrieve']].apply(pd.to_numeric)
        frame['date'] = get_date(file)
        dfs_l.append(frame)
results = pd.concat(dfs_l)
results.sample(10)

In [None]:
aggregate = results.groupby(['data','instance','records']).mean()
flights = aggregate.loc['flights'].reset_index()
other = aggregate.loc['other'].reset_index()

In [None]:
import matplotlib.pyplot as plt

instances = flights.instance.unique()

plt.figure(figsize=(12,4))
plt.subplot(121)
for instance in instances:
    instance_rows = flights[flights['instance'] == instance]
    plt.plot(instance_rows.records, instance_rows['insertion'], label=instance)
    plt.legend()
plt.xlabel('Records')
plt.ylabel('Seconds elapsed')
plt.title('Insertion')
plt.subplot(122)
for instance in instances:
    instance_rows = flights[flights['instance'] == instance]
    plt.plot(instance_rows.records, instance_rows['ins/retrieve'], label=instance)
    plt.legend()
plt.xlabel('Records')
plt.ylabel('Seconds elapsed')
plt.title('Retrieval')
plt.tight_layout()
plt.show()