# Graphs

Base version of the notebook that we used to generate the quality of service graphs. Comments are provided to aid in the usage of the notebook. Modifications may be necessary to get the code functioning.

In [None]:
import pandas as pd
import numpy as np

from scipy import stats

import matplotlib.pyplot as plt 
import matplotlib.patches as mpatches
import matplotlib.ticker as ticker

import glob
import os
import re

import ast

In [None]:
# Styling parameters for the generated graphs

plt.style.use("default")
params = {
    'ytick.color': "black",
    'xtick.color': "black",
    'axes.labelcolor': "black",
    'axes.edgecolor': "black",
    'axes.linewidth': 1,
    'text.usetex': True,
    'font.family': "serif",
    'font.weight': "bold",
    'font.serif': ["Computer Modern Serif"],
    'font.size': 24,
}

plt.rcParams.update(params)
plt.rcParams['font.size'] = 24
plt.rcParams['axes.labelsize'] = 24
plt.rcParams['xtick.labelsize'] = 24
plt.rcParams['ytick.labelsize'] = 24

pd.set_option('mode.chained_assignment', None)

In [None]:
# Read CSV files and combine into a single dataframe

path = r'analysed_results' # ! CHANGE TO CORRECT PATH 
csv_files = glob.glob(os.path.join(path , "*.csv"))

temp_list = []
for filename in csv_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    temp_list.append(df)

combined_data = pd.concat(temp_list, axis=0, ignore_index=True)

In [None]:
# Dictionary/JSON structure containing the graphs to be created. 
# Top level graph structures are for each plot, and within that struct, the
# individual graphs are subplots. "Nice" looking graphs requires adjusting of 
# the individual subplot parameters. 

graphs = {   
    "graph3": {
        "name": "hybrid",
        "data": ["scatter-cloud"],
        "permutation_order": [0],
        "plots": {
            "plot1": {
                "legends": {
                    "exp": [0, 1.2, 1, 0.2],
                    'services': [0, 1.2, 1, 0.2],
                },
                "legend_cols": 2,
                'legend_size': 20,
                "figsize": [15, 10],
                "height_ratios": [1, 1, 1, 1],
                "subplots": (3, 2),
                "graphs": {
                    "graph0": {
                        "name": "FPS",
                        "results_key": "fps",
                        "graph_index": 0,
                        "subplot_loc": (0,0),
                        "filler": False,
                        "y_axis_lims": [0, 32],
                        "y_tick_marks": [0, 31, 10],
                    },
                    "graph1": {
                        "name": r"\begin{center}E2E Lat. (ms)\end{center}",
                        "results_key": "e2e",
                        "graph_index": 1,
                        "subplot_loc": (1,0),
                        "filler": False,
                        "y_axis_lims": [0, 60],
                        "y_tick_marks": [0, 61, 15],
                    },
                    "graph2": {
                        "name": r"\begin{center}Service Lat. (ms)\end{center}",
                        "results_key": "services",
                        "graph_index": 2,
                        "subplot_loc": (2, 0),
                        "filler": False,
                        "y_axis_lims": [0, 55],
                        "y_tick_marks": [0, 51, 15],
                    },
                    "graph3": {
                        "name": r"\begin{center}Mem. (GB)\end{center}",
                        "results_key": "memory",
                        "graph_index": 3,
                        "subplot_loc": (0, 1),
                        "filler": False,
                        "y_axis_lims": [0, 10],
                        "y_tick_marks": [0, 10, 2],
                    },
                    "graph4": {
                        "name": r"\begin{center}CPU Util. (\%)\end{center}",
                        "results_key": "cpu",
                        "graph_index": 4,
                        "subplot_loc": (1, 1),
                        "filler": False,
                        "y_axis_lims": [0, 10],
                        "y_tick_marks": [0, 9, 2],
                    },
                    "graph5": {
                        "name": r"\begin{center}GPU Util. (\%)\end{center}",
                        "results_key": "gpu_util",
                        "graph_index": 5,
                        "subplot_loc": (2, 1),
                        "filler": False,
                        "y_axis_lims": [0, 35],
                        "y_tick_marks": [0, 31, 10],
                    },
                },
            },
        },
    }
}


In [None]:
# Variables for graph plotting 

colours = [
    '3a77b2', 'f3812d', '459e32', 'ca2e33', '9169bb', '86574d', 'da7ac2',
    'babc34', '4ebdcd'
]

service_dict = {
    'primary': {
        'colour': '#ef476f',
        'loc': 0
    },
    'sift': {
        'colour': '#ffd166',
        'loc': 1
    },
    'encoding': {
        'colour': '#06d6a0',
        'loc': 2
    },
    'lsh': {
        'colour': '#7bbee9',
        'loc': 3
    },
    'matching': {
        'colour': '#576aa8',
        'loc': 4
    },
}

bar_width = 0.2

In [None]:
# Dictionary containing the details of the servers, and the CPU/GPU cores 
# used for normalising the data

server_dict = {
    'gpu02': {
        'label': 'Edge2 (E2)',
        'shorthand': '(E2)'
    }, 
    'cm-01-05-061': {
        'label': 'Edge1 (E1)',
    },
    'ip-172-31-28-206': {
        'label': 'Cloud (C)'
    },
    'E1': {
        'hatch': 'xxx',
        'cores': 10,
        'cuda_cores': 4352
    },
    'E2': {
        'hatch': '..',
        'cores': 32,
        'cuda_cores': 10752
    },
    'C': {
        'hatch': '..',
        'cores': 18,
        'cuda_cores': 5120
    }
}

In [None]:
# Functions to help in graph plotting

def client_bar_locs(dataframe):
    """For generating the locations of the bars in the multi-bar plots"""
    
    curr_locs = {}
    locs_list = []
    count_bars = 0
    for i in dataframe:
        curr_loc = i
        if i == 0:
            count_bars += 1
        try:
            latest_loc = curr_locs[i]
        except:
            curr_locs[i] = i + 1 - bar_width
            latest_loc = curr_locs[i]
        
        new_loc = np.around(latest_loc + bar_width, 1)
        curr_locs[i] = new_loc
        locs_list.append(new_loc)
            
    num_bars = len(curr_locs)
    locs_list = np.array(locs_list) - (count_bars/2)*bar_width + bar_width/2 #(num_bars*bar_width + bar_width)/2
    return locs_list

def graph_labels(graph_name, experiment_name, curr_deploy_setup):
    """Generating the experiment labels based on the permutation"""
    
    curr_deploy_setup = ast.literal_eval(curr_deploy_setup)
    # generating labels based on experiment name
    if graph_name == 'loss':
        label_data_packet_loss = curr_deploy_setup['packet_loss']
        
        if 'e-' in str(label_data_packet_loss):
            label_data_packet_loss =  f'{label_data_packet_loss:.5f}'
        
        label = f'Packet Loss: {label_data_packet_loss}\%' 
    elif graph_name == 'latency':
        label_data_latency = np.array(curr_deploy_setup['latencies'])
        client_primary_latency = label_data_latency[2]
        
        label = f'Latency: {client_primary_latency} ms'         
    elif 'scale' in graph_name:
        label_data = curr_deploy_setup['permutation']
        replicas = curr_deploy_setup['replicas']
        label = str(replicas)
    else:
        # for if the servers are different
        label_data = curr_deploy_setup['permutation']
        if label_data.count(label_data[0]) == len(label_data): # check if the same element throughout
            first_item = label_data[0]
            label = server_dict[first_item]['label']
        else:
            label = "["
            concurrent_server = ""
            for k in range(len(label_data)):
                perm_name = label_data[k]
                if 'concurrent' in experiment_name and k == len(label_data)-1:
                    # ignore final service on concurrent workload
                    concurrent_server = server_dict[perm_name]['label'][-3:-1]
                    continue 
                label_item = server_dict[perm_name]['label'][-3:-1]
                if 'C' in label_item:
                    label_item = server_dict[perm_name]['label'][-2:-1]
                label += f'{label_item},'
            label += "]"
            label = ''.join(label.rsplit(',', 1)) # removing final comma
            if 'concurrent' in experiment_name:
                label = f'{concurrent_server}: {label}'
        if experiment_name == "2.1.2023-scale":
            replicas = curr_deploy_setup['replicas']
            label = label + " " + str(replicas)
    return label

def permutation_analysis(graph_name, df_exp, df_perm):
    """Selecting the correct colours for the graph bars"""
    
    labels = []
    for i in range(len(df_exp)):
        curr_exp = df_exp.iloc[i]
        curr_perm = df_perm.iloc[i]
        
        # generating legend labels
        legend_label = graph_labels(graph_name, curr_exp, curr_perm)
        labels.append(legend_label)
        
    # generating the colours
    labels_unique = np.unique(labels)
    lu_num = len(labels_unique)
    selected_colours = colours[:lu_num]
    
    colours_dict = {labels_unique[i]: selected_colours[i] for i in range(len(labels_unique))}    
    colours_list = [f'#{colours_dict[i]}' for i in labels]
    
    return labels, colours_list

def column_types(df_input):
    """Selecting columns that only have numerical data"""
    
    columns_data_types = df_input.dtypes
    unique_data_types = columns_data_types.unique()
    
    columns = {}
    for dtype in unique_data_types:
        columns[dtype.name] = (columns_data_types[columns_data_types==dtype].keys()).values
    return columns
        
def results_analyser(df_results, df_storage):
    """Analysing results and then appending to a dataframe for graphing"""
    
    unique_experiments = df_results['experiment'].unique()    
    for experiment in unique_experiments:
        df_results = df_results.replace([np.inf, -np.inf], np.nan)
        experiment_results = df_results[df_results['experiment'] == experiment]
        unique_permutations = experiment_results['permutation'].unique()
        
        columns = column_types(experiment_results) # columns that have numeric data
        columns_numeric = columns['float64']
        columns_int = columns['int64']
        columns_obj = columns['object']
        
        columns_avg = [x for x in columns_numeric if 'avg' in x]
        columns_std = [x for x in columns_numeric if 'std' in x]
        for permutation in unique_permutations:
            permutation_results = experiment_results[experiment_results['permutation'] == permutation]
            client_nums = permutation_results['client'].unique()
            for client in client_nums:
                curr_client_results = permutation_results[permutation_results['client'] == client].fillna(0)
                curr_client_med = curr_client_results[columns_numeric].median().to_frame().T                
                curr_client_obj = curr_client_results[columns_obj]
                curr_client_int = curr_client_results[columns_int]
                cc_obj_int = curr_client_results.groupby(['experiment','permutation', 'client'], as_index=False)[['deployment_latencies', 'deployment_clients']].agg(list)
                                
                curr_client_res = pd.concat([cc_obj_int, curr_client_med], axis=1)    
                
                curr_client_results.is_copy = False
                curr_client_results['fps_avg'][curr_client_results['fps_avg'] < 0] = 0
                
                # recalculating the standard deviation
                curr_client_res[columns_std] = stats.sem(curr_client_results[columns_avg])

                df_storage = pd.concat([df_storage, curr_client_res], axis=0, ignore_index=True)
    return df_storage

def cpu_normaliser(permutation, service, results):    
    """Normalising CPU results based on number of CPU cores"""
    
    permutation_list = np.array(permutation)
    
    results_list = np.array(results)
    results_normalised = np.array([])

    service_loc = service_dict[service]['loc']
    
    for i in range(len(permutation_list)):
        curr_perm = permutation_list[i]
        curr_result = results_list[i]
        
        # if deployed entirely on one server
        if '[' not in curr_perm:
            server_type = curr_perm[curr_perm.find("(")+1:curr_perm.find(")")]
            if 'L' in curr_perm:
                cores = server_dict['E1']['cores']
            elif 'Packet Loss' in curr_perm:
                cores = server_dict['E1']['cores']
            else:
                cores = server_dict[server_type]['cores']
        elif '[' in curr_perm:
            if ':' in curr_perm:
                curr_perm = curr_perm.split(':')[1]
            curr_perm_stripped = curr_perm.replace('[', '').replace(']', '').split(',')
            curr_server = curr_perm_stripped[service_loc].strip()
            cores = server_dict[curr_server]['cores']
        norm_result = curr_result / cores
        results_normalised = np.append(results_normalised, norm_result)
    return results_normalised

def gpu_normaliser(permutation, service, results):
    """Normalising results based on number of CUDA cores"""
    
    permutation_list = np.array(permutation)
    
    results_list = np.array(results)
    results_normalised = np.array([])

    service_loc = service_dict[service]['loc']
    
    for i in range(len(permutation_list)):
        curr_perm = permutation_list[i]
        curr_result = results_list[i]
        
        # if deployed entirely on one server
        if '[' not in curr_perm:
            server_type = curr_perm[curr_perm.find("(")+1:curr_perm.find(")")]
            if 'L' in curr_perm:
                cores = server_dict['E1']['cuda_cores']
            elif 'Packet Loss' in curr_perm:
                cores = server_dict['E1']['cuda_cores']
            else:
                cores = server_dict[server_type]['cuda_cores']
        elif '[' in curr_perm:
            if ':' in curr_perm:
                curr_perm = curr_perm.split(':')[1]
            curr_perm_stripped = curr_perm.replace('[', '').replace(']', '').split(',')
            curr_server = curr_perm_stripped[service_loc].strip()
            cores = server_dict[curr_server]['cuda_cores']
        norm_result = curr_result / cores
        results_normalised = np.append(results_normalised, norm_result)
    return results_normalised

def x_posn_changer(graph_name, x, x_decimals, correct_order):  
    """Adjusting and correcting the locations of the graph bars"""
    
    x = np.around(x, 1)
    
    x_as_index = int(x)
    x_as_decimal = np.around(np.modf(x)[0], 2)

    x_decimal_posn = np.where(x_decimals == x_as_decimal)[0][0]
    correct_order_posn = correct_order[x_decimal_posn]
    
    x_decimal = x_decimals[correct_order_posn]
    new_x = float(x_as_index + x_decimal)
    
    # for loss graphs:
    if graph_name == 'loss':
        if x_decimal == 0.05:
            new_x += 1 
        elif x_decimal == 0.85:
            new_x -= 1
    elif graph_name == 'cloud':
        if x_decimal == 0.95:
            new_x -= 1
        elif x_decimal == 0.05:
            new_x += 1
    elif 'concurrent' in graph_name:
        if x_decimal == 0.95:
            new_x -= 1
        elif x_decimal == 0.25:
            new_x += 1
    return new_x
    
def x_posns_rearranger(graph_name, input_x, input_y, correct_order):  
    """Support function to generate the new x-axis bar locations"""
    
    current_order = input_y['permutation'].unique().tolist()
    ordered_list = [current_order[i] for i in correct_order]
        
    x_decimals = np.around(np.modf(input_x)[0], 2).unique()
    new_x_posns = input_x.apply(lambda x: x_posn_changer(graph_name, x, x_decimals, correct_order))
    return new_x_posns        

In [None]:
# Looping through the top-level graphs in the "graphs" variable. This code
# needs adjusting based on which scenario you are considering

for graph_name in graphs:
    bar_width = 0.2
    relevant_data_files = graphs[graph_name]['data']
    curr_graph_name = graphs[graph_name]["name"]
    print(f'Current graph is {curr_graph_name} and uses {relevant_data_files}')
    data_mask = combined_data['experiment'].apply(lambda x: any(item for item in relevant_data_files if item in x))
    relevant_results = combined_data[data_mask]
                
    # remove rows that are 4 or more clients
    relevant_results = relevant_results.drop(relevant_results[relevant_results.client > 3].index)

    # calculate median of results across the permutations
    df_combined = pd.DataFrame() 
    df_combined = results_analyser(relevant_results, df_combined)
        
    # analyse permutations to create legends and assign bar colours 
    experiments = df_combined['experiment']
    permutations = df_combined['permutation']
        
    perm_analysis = permutation_analysis(curr_graph_name, experiments, permutations)
            
    legend_labels = perm_analysis[0]
    df_combined['legend_labels'] = legend_labels
    
    bar_colours = perm_analysis[1]
    df_combined['bar_colours'] = bar_colours

    # find how many clients, i.e., locations of the groups of bars
    clients_no = len(df_combined['client'].unique())    
    
    # prepare x-axis data for plotting, i.e., bar locations
    clients = df_combined['client']
    bar_locs = client_bar_locs(clients)
    df_combined['bar_locs'] = bar_locs
    
    print(f'There are {len(permutations.unique())} permutations and {clients_no} clients')
    
    cg_rel_x_posns = df_combined['bar_locs']
    cg_rel_permutation = df_combined['permutation']
    
    # rearrange data in specified permutation order
    correct_order = graphs[graph_name]['permutation_order']
    cg_x_posns = x_posns_rearranger(curr_graph_name, cg_rel_x_posns, df_combined, correct_order)

    plots = graphs[graph_name]['plots']

    for plot_key, plot_item in plots.items():
        graphs_to_be_made = plot_item['graphs']        
        subplots = plot_item['subplots']
        figsize = plot_item['figsize']
        height_ratios = plot_item['height_ratios']
        legends = plot_item['legends']
        
        try:
            legend_size = plot_item['legend_size']
        except:
            legend_size = 9
            
        try:
            legend_cols = plot_item['legend_cols']
        except:
            legend_cols = 3
        
        try:
            gridspec_config = plot_item['gridspec']
        except:
            gridspec_config = {'hspace':0}
        
        fig, axs = plt.subplots(subplots[0], subplots[1],
                                figsize=figsize, sharex=True,
                                gridspec_kw=gridspec_config)
    
        for j in range(len(graphs_to_be_made)):
            curr_graph = graphs_to_be_made[f'graph{j}']
            subplot_loc = curr_graph['subplot_loc']
            
            if curr_graph['filler']:
                continue
            cg_key = curr_graph['results_key'] # curr_graph_key

            graph_index = curr_graph['graph_index']
            
            cg_y_label = curr_graph['name']
            axs[subplot_loc].set_ylabel(r'{{{}}}'.format(cg_y_label))
            
            if cg_key in ['fps', 'success_rate', 'e2e', 'jitter']:
                cg_rel_results = df_combined[f'{cg_key}_avg']
                cg_rel_err = df_combined[f'{cg_key}_std']
                
#                 if cg_key == 'fps':
#                     print(cg_rel_results)
#                     print(cg_rel_err)
                
                if cg_key == 'jitter':
#                     print(cg_rel_results)
                    if 'concurrent' not in curr_graph_name:
#                         pass
                        cg_rel_results /= 1000
                        cg_rel_err /= 1000
                
                cg_rel_labels = df_combined['legend_labels']
                cg_rel_colours = df_combined['bar_colours']
                
                axs[subplot_loc].bar(cg_x_posns, cg_rel_results, yerr=cg_rel_err, 
                                     label=cg_rel_labels, color=cg_rel_colours,
                                     capsize=3, edgecolor='black', zorder=2,
                                     linewidth=0.7, width=bar_width, 
                                     error_kw={'linewidth':0.7, 'capthick':0.7})
            elif cg_key in ['cpu', 'memory', 'services']:
                if 'queue' in curr_graph_name:
                    prev_ser_res = 0
                    for service in service_dict:
                        if cg_key == 'cpu':
                            curr_ser_res = df_combined[f'gpu_{service}_cpu_percent_avg']
                            curr_ser_res = cpu_normaliser(df_combined['legend_labels'], service, curr_ser_res)
                        elif cg_key == 'memory':
                            curr_ser_res = df_combined[f'gpu_{service}_memory.used_avg'] / 1000
                            
                        axs[subplot_loc].bar(cg_x_posns, curr_ser_res, 
                                             bottom=prev_ser_res, width=bar_width,
                                             color=service_dict[service]['colour'],
                                             label=r'$\texttt{{{}}}$'.format(service),
                                             edgecolor='black', zorder=2, linewidth=0.7,
                                             hatch='//')
                        prev_ser_res += curr_ser_res
                else:
                    prev_ser_res = 0
                    for service in service_dict:
                        curr_ser_res = df_combined[f'{cg_key}_{service}_avg']
                        if cg_key == 'cpu' and curr_graph_name != "scale":
                            curr_ser_res = cpu_normaliser(df_combined['legend_labels'], service, curr_ser_res)

                        axs[subplot_loc].bar(cg_x_posns, curr_ser_res, 
                                             bottom=prev_ser_res, width=bar_width,
                                             color=service_dict[service]['colour'],
                                             label=r'$\texttt{{{}}}$'.format(service),
                                             edgecolor='black', zorder=2, linewidth=0.7,
                                             hatch='//')
                        prev_ser_res += curr_ser_res
            elif 'gpu' in cg_key:
                prev_ser_res = 0
                for service in service_dict:
                    if 'memory' in cg_key:
                        curr_ser_res = df_combined[f'gpu_{service}_gpu_memory_usage_avg'] / 1000
#                         print(curr_ser_res)
                    elif 'util' in cg_key:
                        try:
                            curr_ser_res = df_combined[f'gpu_{service}_utilization.gpu_avg']
                            curr_ser_res = gpu_normaliser(df_combined['legend_labels'], service, curr_ser_res)
                        except:
                            pass
                    axs[subplot_loc].bar(cg_x_posns, curr_ser_res, 
                                         bottom=prev_ser_res, width=bar_width,
                                         color=service_dict[service]['colour'],
                                         label=r'$\texttt{{{}}}$'.format(service),
                                         edgecolor='black', zorder=2, linewidth=0.7, 
                                         hatch='//')
                    prev_ser_res += curr_ser_res

            axs[subplot_loc].yaxis.grid('major', zorder=0)
            axs[subplot_loc].tick_params(axis='both')
            
            y_axis_lims = curr_graph['y_axis_lims']
            axs[subplot_loc].set_ylim(y_axis_lims)
            
            yat = curr_graph['y_tick_marks']
            y_axis_ticks = np.arange(yat[0], yat[1], yat[2])
            axs[subplot_loc].set_yticks(y_axis_ticks)
        
        x_vals = np.arange(0, clients_no, 1) + 1
        if 'queue' in curr_graph_name:
            for k in range(4):
                axs[k].set_xticks(x_vals)    
                axs[k].set_xlabel(r'{Number of Clients}')
        else:
            for k in range(2):
                col_loc = 2 
                if plot_key == 'plot2':
                    col_loc = 2
                axs[col_loc,k].set_xticks(x_vals)    
                axs[col_loc,k].set_xlabel(r'{Number of Clients}')

        services_legend_list = []
        for legend_key, legend_item in service_dict.items():
            service_name = legend_key
            service_colour = legend_item['colour']

            services_legend_list.append(mpatches.Patch(facecolor=service_colour, hatch='//',
                label=r'$\texttt{{{}}}$'.format(service_name)))

        try:
            service_legend = axs[0,1].legend(
                handles=services_legend_list, prop={'size': 20}, 
                bbox_to_anchor=legends['services'], loc='upper left', 
                    borderaxespad=0, ncol=3, framealpha=1, mode='expand')
        except:
            service_legend = axs[1].legend(
                handles=services_legend_list, prop={'size': 20}, 
                bbox_to_anchor=legends['services'], loc='upper left', 
                    borderaxespad=0, ncol=5, framealpha=1, mode='expand')

        try:
            handles, labels = axs[0,0].get_legend_handles_labels()
        except:
            handles, labels = axs[0].get_legend_handles_labels()
        by_label = dict(zip(labels, handles))

        if curr_graph_name == 'loss':
            correct_order = [0, 1, 2]
        elif curr_graph_name == 'cloud':
            correct_order = [1, 2, 0, 3]   
        elif 'concurrent_cpu' in curr_graph_name:
            correct_order = [1, 0, 3, 2]
        elif 'concurrent_gpu' in curr_graph_name:
            correct_order = [0, 3, 1, 2]
        try:
            exp_legend = axs[0,0].legend(
                [list(by_label.values())[idx] for idx in correct_order], 
                [list(by_label.keys())[idx] for idx in correct_order], 
                prop={'size': legend_size}, bbox_to_anchor=legends['exp'], 
                loc='upper left', borderaxespad=0, ncol=legend_cols, mode='expand')
        except:
            exp_legend = axs[0].legend(
                [list(by_label.values())[idx] for idx in correct_order], 
                [list(by_label.keys())[idx] for idx in correct_order], 
                prop={'size': legend_size}, bbox_to_anchor=legends['exp'], 
                loc='upper left', borderaxespad=0, ncol=legend_cols, mode='expand')

        fig.align_ylabels(axs)
        plt.tight_layout()
        plt.savefig(f'graphs_conext/{curr_graph_name}_{plot_key}.pdf', bbox_inches='tight', pad_inches=0.01)
        plt.show()