In [None]:
%matplotlib inline

import glob
import os

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
TEST_CASES_PATH = "results_remote_lb"

TEST_CASES_DICT = {
    'Baseline': '2019_04_04_161815',
    'Default Scaling': '2019_04_04_203319',
    'Low Threshold Scaling': '2019_04_04_233925',
    'Aggressive Scaling': '2019_04_05_145551'
}

FILE_TYPE_AWS_METRICS = 'aws_metrics_{0}_*.csv'
FILE_TYPE_USER_RESPONSE = 'users{1}_response_{0}.csv'

SAMPLE_PERIOD_SECONDS = 30

def read_file(test_case_name, file_type, *args):
    file_name = file_type.format(*args)
    file_search_path = os.path.join(TEST_CASES_PATH, TEST_CASES_DICT[test_case_name], file_name)
    file_paths = glob.glob(file_search_path)
    
    # Get test file with test params if not searching aws metrics
    if file_type != FILE_TYPE_AWS_METRICS:
        s_file_name = FILE_TYPE_AWS_METRICS.format(args[0])
        s_file_search_path = os.path.join(TEST_CASES_PATH, TEST_CASES_DICT[test_case_name], s_file_name)
        s_file_paths = glob.glob(s_file_search_path)
        info_file = s_file_paths[0]
    else:
        info_file = file_paths[0]
        
    params_list = os.path.basename(info_file)[12:].split('_')
    
    params_dict = {
        'start_time_seconds': int(params_list[1]),
        'end_time_seconds': int(params_list[2])
    }

    return pd.read_csv(file_paths[0]), params_dict

def get_aws_metrics_for_test_case(test_case_name, metrics='cpu0_util'):
    aws_metrics_list = [read_file(test_case_name, FILE_TYPE_AWS_METRICS, i) for i in range(3)]
    aws_metrics_list = [aws_metrics[0].pivot_table(metrics, ['timepoint'], 'instance_id')
                                for aws_metrics in aws_metrics_list]
    aws_metrics_list = [aws_metrics[aws_metrics.index.astype(int) < int(1800 / SAMPLE_PERIOD_SECONDS)]
                        for aws_metrics in aws_metrics_list]
    return aws_metrics_list


def get_user_responses_for_test_case(test_case_name, user='A'):
    user_responses_list = []
    for i in range(3):
        df, params_dict = read_file(test_case_name, FILE_TYPE_USER_RESPONSE, i, user)
        df.loc[:,'timepoint'] = (df.loc[:,'timeStamp'] / 1000) - params_dict['start_time_seconds']
        user_responses_list.append(df)
    return user_responses_list

def init_figure(figsize, xlabel, ylabel, ylim):
    plt.figure(figsize=figsize)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.ylim(ylim)

def plot_cpu0_util_for_test_case(test_case_name, case_num='all', show='all', new_figure=True):
    
    if new_figure:
        init_figure(figsize=(10,10), xlabel='Time (Minutes)', ylabel='CPU Utilization', ylim=(0,1))

    aws_metrics_list_pivoted = get_aws_metrics_for_test_case(test_case_name)
    if show != 'avg' and case_num == 'all':
        for i in range(3):
            aws_metrics = aws_metrics_list_pivoted[i]
            x = np.arange(0, len(aws_metrics) * SAMPLE_PERIOD_SECONDS / 60, SAMPLE_PERIOD_SECONDS / 60)
            plt.plot(x, aws_metrics)
    if show != 'each' or case_num != 'all':
        if show != 'each':
            df_concat = pd.concat(aws_metrics_list_pivoted, sort=False)
            by_row_index = df_concat.groupby(df_concat.index)
            df_means = by_row_index.mean()
            aws_metrics = df_means.apply(lambda row: row[row!=0].dropna().mean(), axis=1)
        else:
            aws_metrics = aws_metrics_list_pivoted[case_num]
            
        x = np.arange(0, len(aws_metrics) * SAMPLE_PERIOD_SECONDS / 60, SAMPLE_PERIOD_SECONDS / 60)
        lines = plt.plot(x, aws_metrics, label='{} Avg'.format(test_case_name))
        lines[0].set_dashes([2, 2, 10, 2])
    plt.legend(fontsize=16)

def plot_user_response_time_for_test_case(test_case_name, case_num='all', user='A', show='all', new_figure=True):
    """
    :param: case_num
    """
    if new_figure:
        init_figure(figsize=(10,10), xlabel='Time (Minutes)', ylabel='Response Time', ylim=(0,42000))

    user_responses_list = get_user_responses_for_test_case(test_case_name, user)
    if show != 'avg' and case_num == 'all':
        shared_min = min([df.loc[:,'timepoint'].min() for df in user_responses_list])
        shared_max = max([df.loc[:,'timepoint'].max() for df in user_responses_list])

        bins = np.linspace(shared_min, shared_max, num=50)
        for df in user_responses_list:
            df['binnedTimepoint'] = pd.cut(df['timepoint'], bins)
            binned_df = df.groupby('binnedTimepoint').mean()
            binned_df = binned_df[binned_df['timepoint'] / 60 <= 30]
            plt.plot(binned_df['timepoint'] / 60, binned_df.loc[:,'elapsed'].values)
    if show != 'each' or case_num != 'all':
        if show != 'each':
            df = pd.concat(user_responses_list).sort_values(by='timepoint').reset_index(drop=True)
        else:
            df = user_responses_list[case_num]
        
        bins = np.linspace(df.loc[:,'timepoint'].min(), df.loc[:,'timepoint'].max(), num=50)
        df['binnedTimepoint'] = pd.cut(df['timepoint'], bins)
        binned_df = df.groupby('binnedTimepoint').mean()
        binned_df = binned_df[binned_df['timepoint'] / 60 <= 30]

        lines = plt.plot(binned_df['timepoint'] / 60, binned_df.loc[:,'elapsed'].values,
                         label='{} Avg'.format(test_case_name))
        lines[0].set_dashes([2, 2, 10, 2])
    plt.legend(fontsize=16)


## Test plot functions and params below

In [None]:
plot_cpu0_util_for_test_case('Baseline', case_num='all', show='all', new_figure=True)

In [None]:
plot_cpu0_util_for_test_case('Default Scaling')

In [None]:
plot_cpu0_util_for_test_case('Low Threshold Scaling')

In [None]:
plot_cpu0_util_for_test_case('Aggressive Scaling')

In [None]:
plot_user_response_time_for_test_case('Baseline', case_num='all', user='A', show='all', new_figure=True)

In [None]:
plot_user_response_time_for_test_case('Default Scaling')

In [None]:
plot_user_response_time_for_test_case('Low Threshold Scaling')

In [None]:
plot_user_response_time_for_test_case('Aggressive Scaling')

## Below plot functions used in Final Report

In [None]:
plot_user_response_time_for_test_case('Baseline', user='A', show='avg', new_figure=True)
plot_user_response_time_for_test_case('Default Scaling', user='A', show='avg', new_figure=False)
plot_user_response_time_for_test_case('Low Threshold Scaling', user='A', show='avg', new_figure=False)
plot_user_response_time_for_test_case('Aggressive Scaling', user='A', show='avg', new_figure=False)

In [None]:
plot_user_response_time_for_test_case('Baseline', user='B', show='avg', new_figure=True)
plot_user_response_time_for_test_case('Default Scaling', user='B', show='avg', new_figure=False)
plot_user_response_time_for_test_case('Low Threshold Scaling', user='B', show='avg', new_figure=False)
plot_user_response_time_for_test_case('Aggressive Scaling', user='B', show='avg', new_figure=False)

In [None]:
plot_user_response_time_for_test_case('Baseline', user='C', show='avg', new_figure=True)
plot_user_response_time_for_test_case('Default Scaling', user='C', show='avg', new_figure=False)
plot_user_response_time_for_test_case('Low Threshold Scaling', user='C', show='avg', new_figure=False)
plot_user_response_time_for_test_case('Aggressive Scaling', user='C', show='avg', new_figure=False)

In [None]:
plot_cpu0_util_for_test_case('Baseline', show='avg')
plot_cpu0_util_for_test_case('Default Scaling', show='avg', new_figure=False)
plot_cpu0_util_for_test_case('Low Threshold Scaling', show='avg', new_figure=False)
plot_cpu0_util_for_test_case('Aggressive Scaling', show='avg', new_figure=False)

## Avg Cost Plot

In [None]:
avg_test_case_costs = []
avg_test_case_labels = []

for test_case_name in TEST_CASES_DICT.keys():
    running_time_list = get_aws_metrics_for_test_case(test_case_name, metrics='running_time_ms')
    avg_test_case_cost = np.mean([0.1 / 3600 * sum(running_time_series.iloc[0,:]) for running_time_series in running_time_list])
    avg_test_case_costs.append(avg_test_case_cost)
    avg_test_case_labels.append(test_case_name)

plt.figure(figsize=(10,10))
plt.xlabel('Test Case', fontsize=16)
plt.ylabel('Avg Cost Over Test Duration ($)', fontsize=16)

plt.bar(range(len(avg_test_case_costs)), avg_test_case_costs, tick_label=avg_test_case_labels)