# Comparing performance of the Express implementation with and without node cluster module

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns

In [None]:
results = pd.read_csv("../results.csv")

In [None]:
results = results.sort_values(["concurrency", "framework"], ascending=[True, False])

In [None]:
def request_per_second_plot(results, endpoint, framework1, framework2, is_larger_instance):
    framework_results = results[(results["framework"] == framework1) | (results["framework"] == framework2)]
    endpoint_results = framework_results[framework_results["endpoint"] == endpoint]
    fig = plt.figure(figsize=(4, 3), dpi=200)
    sns.lineplot(data=endpoint_results, x="concurrency", y="requests_per_second", hue="framework")
    handles, labels = plt.gca().get_legend_handles_labels()
    order = [0, 1]
    plt.gca().legend([handles[idx] for idx in order], [labels[idx] for idx in order])
    
    for label in plt.gca().get_legend().get_texts():
        if label.get_text() == "express_without_cluster":
            label.set_text("Without Node cluster")
        elif label.get_text() == "express_with_cluster":
            label.set_text("With Node cluster")
        elif label.get_text() == "express_big_instance_without_cluster":
            label.set_text("Without Node cluster (larger instance)")
        elif label.get_text() == "express_big_instance_with_cluster":
            label.set_text("With Node cluster (larger instance)")
    
    plt.xscale("log", base=2)
    plt.ylim(bottom=0)
    plt.xticks([2**i for i in range(9)], [2**i for i in range(9)])
    plt.xlabel("Concurrency", fontsize=11)
    plt.ylabel("Requests/s", fontsize=11)
    #plt.title(f"Express {endpoint if endpoint != 'get_price' else 'getPrice'} endpoint")
    fig_dir = "../graphs/node_cluster_vs_non_cluster/" + ("larger_instance" if is_larger_instance else "normal_instance") + "/"
    os.makedirs(fig_dir, exist_ok=True)
    plt.savefig("../graphs/node_cluster_vs_non_cluster/" + ("larger_instance" if is_larger_instance else "normal_instance") + f"/express_{endpoint}_requests_per_second.png", dpi=fig.dpi, bbox_inches = 'tight')
    plt.show()
    plt.close()
    
def response_time_plot(results, endpoint, framework1, framework2, is_larger_instance):
    framework_results = results[(results["framework"] == framework1) | (results["framework"] == framework2)]
    endpoint_results = framework_results[framework_results["endpoint"] == endpoint]
    fig = plt.figure(figsize=(4, 3), dpi=200)
    sns.lineplot(data=endpoint_results, x="concurrency", y="avg_response_time", hue="framework")
    handles, labels = plt.gca().get_legend_handles_labels()
    order = [0, 1]
    plt.gca().legend([handles[idx] for idx in order], [labels[idx] for idx in order])
    
    for label in plt.gca().get_legend().get_texts():
        if label.get_text() == "express_without_cluster":
            label.set_text("Without Node cluster")
        elif label.get_text() == "express_with_cluster":
            label.set_text("With Node cluster")
        elif label.get_text() == "express_big_instance_without_cluster":
            label.set_text("Without Node cluster (larger instance)")
        elif label.get_text() == "express_big_instance_with_cluster":
            label.set_text("With Node cluster (larger instance)")
    
    plt.xscale("log", base=2)
    plt.ylim(bottom=0)
    plt.xticks([2**i for i in range(9)], [2**i for i in range(9)])
    plt.xlabel("Concurrency", fontsize=11)
    plt.ylabel("Mean response time (ms)", fontsize=11)
    #plt.title(f"Express {endpoint if endpoint != 'get_price' else 'getPrice'} endpoint")
    fig_dir = "../graphs/node_cluster_vs_non_cluster/" + ("larger_instance" if is_larger_instance else "normal_instance") + "/"
    os.makedirs(fig_dir, exist_ok=True)
    plt.savefig("../graphs/node_cluster_vs_non_cluster/" + ("larger_instance" if is_larger_instance else "normal_instance") + f"/express_{endpoint}_response_time.png", dpi=fig.dpi, bbox_inches = 'tight')
    plt.show()
    plt.close()

In [None]:
request_per_second_plot(results, "echo", "express_without_cluster", "express_with_cluster", False)

In [None]:
response_time_plot(results, "echo", "express_without_cluster", "express_with_cluster", False)

In [None]:
request_per_second_plot(results, "get_price", "express_without_cluster", "express_with_cluster", False)

In [None]:
response_time_plot(results, "get_price", "express_without_cluster", "express_with_cluster", False)

In [None]:
request_per_second_plot(results, "compute", "express_without_cluster", "express_with_cluster", False)

In [None]:
response_time_plot(results, "compute", "express_without_cluster", "express_with_cluster", False)

In [None]:
request_per_second_plot(results, "parse", "express_without_cluster", "express_with_cluster", False)

In [None]:
response_time_plot(results, "parse", "express_without_cluster", "express_with_cluster", False)

In [None]:
request_per_second_plot(results, "query", "express_without_cluster", "express_with_cluster", False)

In [None]:
response_time_plot(results, "query", "express_without_cluster", "express_with_cluster", False)

In [None]:
def relative_performance_plot(results, without_cluster_framework, with_cluster_framework, is_larger_instance):
    max_requests_per_second_without_cluster = results[results["framework"] == without_cluster_framework].groupby("endpoint")["requests_per_second"].max()
    max_requests_per_second_with_cluster = results[results["framework"] == with_cluster_framework].groupby("endpoint")["requests_per_second"].max()
    max_requests_per_second_without_cluster = max_requests_per_second_without_cluster.rename(index={"get_price": "getPrice"})
    max_requests_per_second_with_cluster = max_requests_per_second_with_cluster.rename(index={"get_price": "getPrice"})
    max_requests_per_second_without_cluster = max_requests_per_second_without_cluster.reindex(["echo", "getPrice", "compute", "parse", "query"])
    max_requests_per_second_with_cluster = max_requests_per_second_with_cluster.reindex(["echo", "getPrice", "compute", "parse", "query"])
    max_requests_per_second_with_cluster = max_requests_per_second_with_cluster / max_requests_per_second_without_cluster
    max_requests_per_second_without_cluster = max_requests_per_second_without_cluster / max_requests_per_second_without_cluster
    max_requests_per_second_without_cluster["average"] = max_requests_per_second_without_cluster.mean()
    max_requests_per_second_with_cluster["average"] = max_requests_per_second_with_cluster.mean()
    
    fig = plt.figure(figsize=(6, 3), dpi=200)
    barWidth = 0.35
    r1 = np.arange(len(max_requests_per_second_without_cluster))
    r2 = [x + barWidth for x in r1]
    label_without_cluster = "Without Node cluster" if not is_larger_instance else "Without Node cluster (larger instance)"
    label_with_cluster = "With Node cluster" if not is_larger_instance else "With Node cluster (larger instance)"
    plt.bar(r1, max_requests_per_second_without_cluster, width=barWidth, label=label_without_cluster)
    plt.bar(r2, max_requests_per_second_with_cluster, width=barWidth, label=label_with_cluster)
    plt.xticks([r + barWidth/2 for r in range(len(max_requests_per_second_without_cluster))], max_requests_per_second_without_cluster.index)
    plt.xlabel("Endpoint", fontsize=11)
    plt.ylabel("Relative requests/s", fontsize=11)
    if not is_larger_instance:
        plt.ylim(top=1.7)
    else:
        plt.ylim(top=3)
    #plt.title("Relative Express performance")
    handles, labels = plt.gca().get_legend_handles_labels()
    order = [1, 0]
    plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order])
    for i in range(len(max_requests_per_second_without_cluster)):
        relative_increase = (max_requests_per_second_with_cluster[i] - 1) * 100
        if relative_increase > 0:
            relative_increase = f"+{relative_increase:.1f}%"
        else:
            relative_increase = f"{relative_increase:.1f}%"
        plt.text(r2[i] + 0.05, max_requests_per_second_with_cluster[i], relative_increase, ha="center", va="bottom", fontsize=8)
    
    fig_dir = "../graphs/node_cluster_vs_non_cluster/" + ("larger_instance" if is_larger_instance else "normal_instance") + "/"
    os.makedirs(fig_dir, exist_ok=True)
    plt.savefig("../graphs/node_cluster_vs_non_cluster/" + ("larger_instance" if is_larger_instance else "normal_instance") + "/express_relative_performance.png", dpi=fig.dpi, bbox_inches = 'tight')
    plt.show()
    plt.close()

In [None]:
relative_performance_plot(results, "express_without_cluster", "express_with_cluster", False)

In [None]:
# Now we do the same for the larger instance

In [None]:
request_per_second_plot(results, "echo", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
response_time_plot(results, "echo", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
request_per_second_plot(results, "get_price", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
response_time_plot(results, "get_price", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
request_per_second_plot(results, "compute", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
response_time_plot(results, "compute", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
request_per_second_plot(results, "parse", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
response_time_plot(results, "parse", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
request_per_second_plot(results, "query", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
response_time_plot(results, "query", "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)

In [None]:
relative_performance_plot(results, "express_big_instance_without_cluster", "express_big_instance_with_cluster", True)