In [None]:
%matplotlib inline

import glob
import os

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
TEST_CASES_PATH = "results_remote_lb"

TEST_CASES_DICT = {
    'Baseline': '2019_04_04_161815',
    'Default Scaling': '2019_04_04_203319',
    'Low Threshold Scaling': '2019_04_04_233925',
    'Aggressive Scaling': '2019_04_05_145551'
}

FILE_TYPE_AWS_METRICS = 'aws_metrics_{0}_*.csv'
FILE_TYPE_USER_RESPONSE = 'users{1}_response_{0}.csv'

SAMPLE_PERIOD_SECONDS = 30

def read_file(test_case_name, file_type, *args):
    file_name = file_type.format(*args)
    file_search_path = os.path.join(TEST_CASES_PATH, TEST_CASES_DICT[test_case_name], file_name)
    file_paths = glob.glob(file_search_path)
    
    # Get test file with test params if not searching aws metrics
    if file_type != FILE_TYPE_AWS_METRICS:
        s_file_name = FILE_TYPE_AWS_METRICS.format(args[0])
        s_file_search_path = os.path.join(TEST_CASES_PATH, TEST_CASES_DICT[test_case_name], s_file_name)
        s_file_paths = glob.glob(s_file_search_path)
        info_file = s_file_paths[0]
    else:
        info_file = file_paths[0]
        
    params_list = os.path.basename(info_file)[12:].split('_')
    
    params_dict = {
        'start_time_seconds': int(params_list[1]),
        'end_time_seconds': int(params_list[2])
    }

    return pd.read_csv(file_paths[0]), params_dict

def get_aws_metrics_for_test_case(test_case_name):
    aws_metrics_list = [read_file(test_case_name, FILE_TYPE_AWS_METRICS, i) for i in range(3)]
    aws_metrics_list_pivoted = [aws_metrics[0].pivot_table('cpu0_util', ['timepoint'], 'instance_id')
                                for aws_metrics in aws_metrics_list]
    return aws_metrics_list_pivoted

def get_user_responses_for_test_case(test_case_name, user='A'):
    user_responses_list = []
    for i in range(3):
        df, params_dict = read_file(test_case_name, FILE_TYPE_USER_RESPONSE, i, user)
        df.loc[:,'timepoint'] = (df.loc[:,'timeStamp'] / 1000) - params_dict['start_time_seconds']
        user_responses_list.append(df)
    return user_responses_list

def plot_cpu0_util_for_test_case(test_case_name, case_num='all', show='all'):
    plt.figure(figsize=(20,10))
    plt.xlabel('Minutes')
    plt.ylabel('CPU Utilization')
    plt.ylim(0, 1)

    aws_metrics_list_pivoted = get_aws_metrics_for_test_case(test_case_name)
    if show != 'avg' and case_num == 'all':
        for i in range(3):
            aws_metrics = aws_metrics_list_pivoted[i]
            x = np.arange(0, len(aws_metrics) * SAMPLE_PERIOD_SECONDS / 60, SAMPLE_PERIOD_SECONDS / 60)
            plt.plot(x, aws_metrics)
    if show != 'each' or case_num != 'all':
        if show != 'each':
            df_concat = pd.concat(aws_metrics_list_pivoted)
            by_row_index = df_concat.groupby(df_concat.index)
            df_means = by_row_index.mean()
            aws_metrics = df_means.apply(lambda row: row[row!=0].dropna().mean(), axis=1)
        else:
            aws_metrics = aws_metrics_list_pivoted[case_num]
            
        x = np.arange(0, len(aws_metrics) * SAMPLE_PERIOD_SECONDS / 60, SAMPLE_PERIOD_SECONDS / 60)
        lines = plt.plot(x, aws_metrics)
        lines[0].set_dashes([2, 2, 10, 2])

def plot_user_response_time_for_test_case(test_case_name, case_num='all', user='A', avg=False):
    plt.figure(figsize=(20,10))
    plt.xlabel('Timestamp')
    plt.ylabel('Response Time')
    plt.ylim(0, 50000)
    
    user_responses_list = get_user_responses_for_test_case(test_case_name, user)
    if case_num == 'all':
        shared_min = min([df.loc[:,'timepoint'].min() for df in user_responses_list])
        shared_max = max([df.loc[:,'timepoint'].max() for df in user_responses_list])

        bins = np.linspace(shared_min, shared_max, num=50)
        for df in user_responses_list:
            df['binnedTimepoint'] = pd.cut(df['timepoint'], bins)
            binned_df = df.groupby('binnedTimepoint').mean()
            plt.plot(binned_df.loc[:,'elapsed'].values)
    else:
        df = user_responses_list[case_num]
        bins = np.linspace(df.loc[:,'timepoint'].min(), df.loc[:,'timepoint'].max(), num=50)
        df['binnedTimepoint'] = pd.cut(df['timepoint'], bins)
        binned_df = df.groupby('binnedTimepoint').mean()
        plt.plot(binned_df.loc[:,'elapsed'].values)


In [None]:
plot_cpu0_util_for_test_case('Baseline')

In [None]:
plot_cpu0_util_for_test_case('Default Scaling')

In [None]:
plot_cpu0_util_for_test_case('Low Threshold Scaling')

In [None]:
plot_cpu0_util_for_test_case('Aggressive Scaling')

In [None]:
plot_user_response_time_for_test_case('Baseline')

In [None]:
plot_user_response_time_for_test_case('Default Scaling')

In [None]:
plot_user_response_time_for_test_case('Low Threshold Scaling')

In [None]:
plot_user_response_time_for_test_case('Aggressive Scaling')