In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import math

In [None]:
def get_mean_function_duration(endpoint_name):
    logs = pd.read_json(f"../cloudwatch_logs/{endpoint_name}_cloudwatch_logs.json")
    # filter logs to only include logs with a time after
    logs = logs[logs["@message"].str.startswith("REPORT")]
    logs["duration"] = logs["@message"].str.extract(r"Duration: (\d+\.\d+) ms")
    logs["billed_duration"] = logs["@message"].str.extract(r"Billed Duration: (\d+) ms")
    logs["duration"] = pd.to_numeric(logs["duration"])
    logs["billed_duration"] = pd.to_numeric(logs["billed_duration"])
    return logs["duration"].mean()

In [None]:
get_mean_function_duration("echo")

In [None]:
def get_mean_billed_duration(endpoint_name):
    logs = pd.read_json(f"../cloudwatch_logs/{endpoint_name}_cloudwatch_logs.json")
    logs = logs[logs["@message"].str.startswith("REPORT")]
    logs["duration"] = logs["@message"].str.extract(r"Duration: (\d+\.\d+) ms")
    logs["billed_duration"] = logs["@message"].str.extract(r"Billed Duration: (\d+) ms")
    logs["duration"] = pd.to_numeric(logs["duration"])
    logs["billed_duration"] = pd.to_numeric(logs["billed_duration"])
    return logs["billed_duration"].mean()

In [None]:
endpoints = ["echo", "getPrice", "compute", "parse", "query"]
df = pd.DataFrame(index=endpoints)
df["mean_duration"] = [get_mean_function_duration(endpoint) for endpoint in endpoints]
df["mean_billed_duration"] = [get_mean_billed_duration(endpoint) for endpoint in endpoints]

In [None]:
df["billed_gb_seconds"] = (df["mean_billed_duration"] * 0.128) / 1000

In [None]:
# we calculate the cost of the duration
price_per_gb_second = 0.0000166667
df["duration_cost"] = df["billed_gb_seconds"] * price_per_gb_second

In [None]:
price_per_function_request = 0.0000002
price_per_api_gateway_request = 0.0000012
# request cost is duration_cost + price_per_function_request + price_per_api_gateway_request
df["request_cost"] = df["duration_cost"] + price_per_function_request + price_per_api_gateway_request

In [None]:
print(df["billed_gb_seconds"][4])

In [None]:
results = pd.read_csv("../results.csv")
results = results.sort_values(by=["concurrency", "framework"])
results = results[results["framework"] != "express_without_cluster"]
results = results[results["framework"] != "express_big_instance_without_cluster"]
results = results[results["framework"] != "express_big_instance_with_cluster"]
results = results[results["framework"] != "spring_large_instance"]

In [None]:
ec2_instance_price_per_second = 0.12075 / 3600

In [None]:
def price_per_request_chart(serverless_df, results, endpoint):
    max_requests_per_second_spring = results[results["framework"] == "spring"].groupby("endpoint")["requests_per_second"].max()
    max_requests_per_second_express = results[results["framework"] == "express_with_cluster"].groupby("endpoint")["requests_per_second"].max()
    max_requests_per_second_django = results[results["framework"] == "django"].groupby("endpoint")["requests_per_second"].max()
    
    max_requests_per_second_spring_for_endpoint = max_requests_per_second_spring[endpoint]
    max_requests_per_second_express_for_endpoint = max_requests_per_second_express[endpoint]
    max_requests_per_second_django_for_endpoint = max_requests_per_second_django[endpoint]
    
    serverless_cost_per_request = serverless_df.loc[endpoint if endpoint != "get_price" else "getPrice", "request_cost"]
    
    fig = plt.figure(figsize=(6, 3), dpi=200)
    
    max_value_for_chart = 76
    plt.plot(range(1, max_value_for_chart), [(ec2_instance_price_per_second * (math.ceil(i / max_requests_per_second_django_for_endpoint))) / i for i in range(1, max_value_for_chart)], label="_nolegend_")
    plt.plot(range(1, max_value_for_chart), [(ec2_instance_price_per_second * (math.ceil(i / max_requests_per_second_express_for_endpoint))) / i for i in range(1, max_value_for_chart)], label="_nolegend_")
    plt.plot(range(1, max_value_for_chart), [(ec2_instance_price_per_second * (math.ceil(i / max_requests_per_second_spring_for_endpoint))) / i for i in range(1, max_value_for_chart)], label="All frameworks")
    plt.plot(range(1, max_value_for_chart), [serverless_cost_per_request for i in range(1, max_value_for_chart)], label="Serverless")
    
    handles, labels = plt.gca().get_legend_handles_labels()
    order = [0, 1]
    plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order])
    
    plt.ylim(top=0.02 / 3600, bottom=0)
    plt.xlabel("Requests per second")
    plt.ylabel("Cost per request (USD)")
    
    # calculate the x value at which the serverless cost is equal to the spring cost
    x = (ec2_instance_price_per_second * (math.ceil(max_requests_per_second_spring_for_endpoint / max_requests_per_second_spring_for_endpoint))) / serverless_cost_per_request
    # vertical line at x that stops at the height of the intersection
    plt.plot([x, x], [0, serverless_cost_per_request], linestyle="--", color="black")
    
    # add label on the x axis
    plt.text(x + 1, 0.0000004, f"{x:.2f} requests/s", verticalalignment='bottom')
    
    fig_dir = "../graphs/framework_serverless_price_comparison/"
    os.makedirs(fig_dir, exist_ok=True)
    plt.savefig(fig_dir + f"/{endpoint}_price_per_request.png",dpi=fig.dpi, bbox_inches = 'tight')
    plt.show()
    plt.close()

In [None]:
price_per_request_chart(df, results, "get_price")

In [None]:
def price_per_request_chart_query(serverless_df, results):
    endpoint = "query"
    max_requests_per_second_spring = results[results["framework"] == "spring"].groupby("endpoint")["requests_per_second"].max()
    max_requests_per_second_express = results[results["framework"] == "express_with_cluster"].groupby("endpoint")["requests_per_second"].max()
    max_requests_per_second_django = results[results["framework"] == "django"].groupby("endpoint")["requests_per_second"].max()
    
    max_requests_per_second_spring_for_endpoint = max_requests_per_second_spring[endpoint]
    max_requests_per_second_express_for_endpoint = max_requests_per_second_express[endpoint]
    max_requests_per_second_django_for_endpoint = max_requests_per_second_django[endpoint]
    
    serverless_cost_per_request = serverless_df.loc[endpoint if endpoint != "get_price" else "getPrice", "request_cost"]
    
    fig = plt.figure(figsize=(6, 3), dpi=200)
    
    max_value_for_chart = 36
    plt.plot(range(1, max_value_for_chart), [(ec2_instance_price_per_second * (math.ceil(i / max_requests_per_second_django_for_endpoint))) / i for i in range(1, max_value_for_chart)], label="Django")
    plt.plot(range(1, max_value_for_chart), [(ec2_instance_price_per_second * (math.ceil(i / max_requests_per_second_express_for_endpoint))) / i for i in range(1, max_value_for_chart)], label="Express")
    plt.plot(range(1, max_value_for_chart), [(ec2_instance_price_per_second * (math.ceil(i / max_requests_per_second_spring_for_endpoint))) / i for i in range(1, max_value_for_chart)], label="Spring")
    plt.plot(range(1, max_value_for_chart), [serverless_cost_per_request for i in range(1, max_value_for_chart)], label="Serverless")
    
    handles, labels = plt.gca().get_legend_handles_labels()
    order = [2, 3, 0, 1]
    plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], loc="upper right", ncol=2)
    
    plt.ylim(top=0.045 / 3600, bottom=0)
    plt.xlabel("Requests per second")
    plt.ylabel("Cost per request (USD)")
    
    # calculate the x value at which the serverless cost is equal to the spring cost
    x = (ec2_instance_price_per_second * (math.ceil(max_requests_per_second_spring_for_endpoint / max_requests_per_second_spring_for_endpoint))) / serverless_cost_per_request
    # vertical line at x that stops at the height of the intersection
    plt.plot([x, x], [0, serverless_cost_per_request], linestyle="--", color="black")
    
    # add label on the x axis
    plt.text(x + 1, 0.0000004, f"{x:.2f} requests/s", verticalalignment='bottom')
    
    fig_dir = "../graphs/framework_serverless_price_comparison/"
    os.makedirs(fig_dir, exist_ok=True)
    plt.savefig(fig_dir + f"/{endpoint}_price_per_request.png",dpi=fig.dpi, bbox_inches = 'tight')
    plt.show()
    plt.close()

In [None]:
price_per_request_chart_query(df, results)