In [None]:
import os
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from cycler import cycler

In [None]:
time_format = '%d-%m %H:%M:%S'
def extract_startend_times(inputdir, rep, dstat_filename, truncate_sec):
    # create filepath
    filepath = os.path.join(inputdir, str(rep), dstat_filename)
    csv_file = pd.read_csv(filepath, header=3)
    
    start_time = csv_file['time'].min()
    end_time = csv_file['time'].max()
    parsed_start_time = datetime.datetime.strptime(start_time, time_format)
    parsed_end_time = datetime.datetime.strptime(end_time, time_format)
    truncated_start_time = parsed_start_time + datetime.timedelta(0,truncate_sec)
    truncated_end_time = parsed_end_time - datetime.timedelta(0,truncate_sec)
    truncated_start_string = truncated_start_time.strftime(time_format)
    truncated_end_string = truncated_end_time.strftime(time_format)
    return (truncated_start_string, truncated_end_string)

In [None]:
def read_dstat_from_server(experiment_dir, server_dstat_filename, startend_times_tuple):
    start_string = startend_times_tuple[0]
    end_string = startend_times_tuple[1]
    
    filepath = os.path.join(experiment_dir, server_dstat_filename)
    csv_file = pd.read_csv(filepath, header=3)
    
    truncated_data = csv_file[(csv_file['time'] >= start_string) & (csv_file['time'] <= end_string)]
    
    excerp = truncated_data.loc[:, ['idl', 'recv', 'send']]
    means = excerp.mean()
    means['load'] = 100 - means['idl']
    
    return means

In [None]:
def read_server_dstat_all_reps(experiment_dir, reps, client_dstat_filename, server_dstat_filename, truncate_sec):
    rep_time_tuples = [extract_startend_times(inputdir, rep, client_dstat_filename, truncate_sec) for rep in range(1, reps+1)]
    all_reps = [read_dstat_from_server(experiment_dir, server_dstat_filename, tup) for tup in rep_time_tuples]
    result = pd.concat(all_reps, axis='columns').mean(axis='columns')
    return result

In [None]:
def read_server_dstat_all_reps_noavg(experiment_dir, reps, client_dstat_filename, server_dstat_filename, truncate_sec):
    rep_time_tuples = [extract_startend_times(inputdir, rep, client_dstat_filename, truncate_sec) for rep in range(1, reps+1)]
    all_reps = [read_dstat_from_server(experiment_dir, server_dstat_filename, tup) for tup in rep_time_tuples]
    result = pd.concat(all_reps, axis='columns')
    return result

In [None]:
def read_dstat_all_servers(experiment_dir, reps, client_dstat_filename, server_dstat_filenames, truncate_sec):
    all_servers = [read_server_dstat_all_reps(experiment_dir, reps, client_dstat_filename, filename, truncate_sec) for filename in server_dstat_filenames]
    result = pd.concat(all_servers, axis='columns')
    return result

In [None]:
def mean_dstat_servers(data, mean_columns):
    return data.loc[mean_columns, :].mean(axis='columns')

def sum_dstat_servers(data, sum_columns):
    return data.loc[sum_columns, :].sum(axis='columns')

def aggregate_dstat_all_servers(experiment_dir, reps, dstat_filename, server_dstat_filenames, truncate_sec):
    data = read_dstat_all_servers(experiment_dir, reps, dstat_filename, server_dstat_filenames, truncate_sec)
    means = mean_dstat_servers(data, ['load'])
    sums = sum_dstat_servers(data, ['recv', 'send'])
    return pd.concat([means, sums])

In [None]:
experiment_dir = "" # 5.2 nonsharded experiment dir
sharded = "nonsharded"
#experiment_dir = "/home/flo/Documents/eth-asl-final-experiment-data/exp5/5_1_multigets_sharded_2017-11-24_085912"
#sharded = "sharded"

multigets = [1, 3, 6, 9]

dstat_filename = "client_dstat_01.log"
server_dstat_filenames = ["server_dstat_06.log", "server_dstat_07.log"]
reps = 3
truncate_sec = 5

all_metrics = []
for multiget in multigets:
    inputdir = os.path.join(experiment_dir, "{}_{}multiget".format(sharded, multiget))
    single_metrics = read_dstat_all_servers(experiment_dir, reps, dstat_filename, server_dstat_filenames, truncate_sec)
    metrics = aggregate_dstat_all_servers(experiment_dir, reps, dstat_filename, server_dstat_filenames, truncate_sec)
    metrics['multigets'] = multiget
    all_metrics.append(metrics)
agg_table = pd.concat(all_metrics, axis=1).transpose()
agg_table['recv'] = agg_table['recv'] / 1000000
agg_table['send'] = agg_table['send'] / 1000000


In [None]:
agg_table

In [None]:

fig, ax = plt.subplots()
color_cycler = cycler('color', ['#ccece6', '#66c2a4', '#238b45', '#00441b'])
ax.set_ylim([0, 30])
#ax.set_xlim([0, 400])
ax.set_prop_cycle(color_cycler)
agg_table.plot(ax=ax, x='multigets', y='load', marker='o')
#plt.xticks(multigets)
ax.legend(loc="best", fontsize="small")
ax.set_xlabel("Number of clients")
ax.set_ylabel("Throughput (MB/s)")

plt.show()