In [None]:
import os
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from cycler import cycler

In [None]:
time_format = '%d-%m %H:%M:%S'
def read_single_dstat(inputdir, rep, dstat_filename, truncate_sec):
    # create filepath
    filepath = os.path.join(inputdir, str(rep), dstat_filename)
    csv_file = pd.read_csv(filepath, header=3)
    
    start_time = csv_file['time'].min()
    end_time = csv_file['time'].max()
    parsed_start_time = datetime.datetime.strptime(start_time, time_format)
    parsed_end_time = datetime.datetime.strptime(end_time, time_format)
    truncated_start_time = parsed_start_time + datetime.timedelta(0,truncate_sec)
    truncated_end_time = parsed_end_time - datetime.timedelta(0,truncate_sec)
    truncated_start_string = truncated_start_time.strftime(time_format)
    truncated_end_string = truncated_end_time.strftime(time_format)
    
    truncated_data = csv_file[(csv_file['time'] >= truncated_start_string) & (csv_file['time'] <= truncated_end_string)]
    
    excerp = truncated_data.loc[:, ['idl', 'recv', 'send']]
    means = excerp.mean()
    means['load'] = 100 - means['idl']
    
    return means

In [None]:
def read_dstat_all_reps(inputdir, reps, dstat_filename, truncate_sec):
    all_reps = [read_single_dstat(inputdir, rep, dstat_filename, truncate_sec) for rep in range(1, reps+1)]
    result = pd.concat(all_reps, axis='columns').mean(axis='columns')
    return result

In [None]:
def read_dstat_all_clients(inputdir, reps, dstat_filenames, truncate_sec):
    all_clients = [read_dstat_all_reps(inputdir, reps, filename, truncate_sec) for filename in dstat_filenames]
    result = pd.concat(all_clients, axis='columns')
    return result

In [None]:
def mean_dstat_clients(data, mean_columns):
    return data.loc[mean_columns, :].mean(axis='columns')

def sum_dstat_clients(data, sum_columns):
    return data.loc[sum_columns, :].sum(axis='columns')

def aggregate_dstat_all_clients(inputdir, reps, dstat_filenames, truncate_sec):
    data = read_dstat_all_clients(inputdir, reps, dstat_filenames, truncate_sec)
    means = mean_dstat_clients(data, ['load'])
    sums = sum_dstat_clients(data, ['recv', 'send'])
    return pd.concat([means, sums])

In [None]:
experiment_dir = "" # 5.2 nonsharded experiment dir
multigets = [1, 3, 6, 9]
sharded = "nonsharded"
dstat_filenames = ["client_dstat_01.log", "client_dstat_02.log", "client_dstat_03.log"]
reps = 3
truncate_sec = 10
num_threads = 2

all_metrics = []
for multiget in multigets:
    inputdir = os.path.join(experiment_dir, "{}_{}multiget".format(sharded, multiget))
    metrics = aggregate_dstat_all_clients(inputdir, reps, dstat_filenames, truncate_sec)
    metrics['multigets'] = multiget
    all_metrics.append(metrics)
agg_table = pd.concat(all_metrics, axis=1).transpose()
agg_table['recv'] = agg_table['recv'] / 1000000
agg_table['send'] = agg_table['send'] / 1000000

fig, ax = plt.subplots()
color_cycler = cycler('color', ['#ccece6', '#66c2a4', '#238b45', '#00441b'])
ax.set_ylim([0, 30])
#ax.set_xlim([0, 350])
ax.set_prop_cycle(color_cycler)
agg_table.plot(ax=ax, x='multigets', y='send', marker='o')
#plt.axhline(y=25.38,hold=None, color='r')
ax.legend(loc="best", fontsize="small")
ax.set_title("Send network activity of the memtier VM")
ax.set_xlabel("Number of clients")
ax.set_ylabel("Send Throughput (MB/s)")

plt.show()
fig.savefig("./graphs/exp5/memtier_Send.png", dpi=300)

