In [None]:
import os, re, sys, argparse, math, random
import numpy as np
from collections import OrderedDict
import matplotlib.pyplot as plt
from matplotlib.ticker import *
from matplotlib_helper import *

In [None]:
def kB(num):
    return num * 1024

def MB(num):
    return num * 1024 * 1024

def GB(num):
    return num * 1024 * 1024 * 1024

def TB(num):
    return num * 1024 * 1024 * 1024 * 1024

In [None]:
def convert_to_percentage_string(num):
    return '%.2f' % (num * 100)

In [None]:
# l_dataset_size = [int(pow(2, n)) for n in np.linspace(10, 40, 31)]   # 1kB to 1TB
# print(l_dataset_size)

storage_size = 1000 # in GBs
transaction_count = pow(10, 6)

def get_storage_cost(storage_size):
    storage_price = 0.023/30   # $/GB/day, converted from per-month
    return storage_price * storage_size

def get_transaction_cost(transaction_count, write_percentage):
    transaction_price_read = 0.0004  # $/1k read
    transaction_price_write = 0.005  # $/1k write
    read_percentage = 1 - write_percentage
    read_transaction_cost = transaction_price_read * transaction_count / 1e3 * read_percentage
    write_transaction_cost = transaction_price_write * transaction_count / 1e3 * write_percentage
    return read_transaction_cost + write_transaction_cost

def get_read_transaction_cost(transaction_count, write_percentage):
    transaction_price_read = 0.0004  # $/1k read
    read_percentage = 1 - write_percentage
    read_transaction_cost = transaction_price_read * transaction_count / 1e3 * read_percentage
    return read_transaction_cost

def get_write_transaction_cost(transaction_count, write_percentage):
    transaction_price_write = 0.005  # $/1k write
    write_transaction_cost = transaction_price_write * transaction_count / 1e3 * write_percentage
    return write_transaction_cost

def get_replication_cost(replication_size):
    replication_cost = 0.02 # $/GB traffic
    return replication_cost * replication_size

In [None]:
l_write_percentage = [0.01, 0.05, 0.1, 0.5]
l_working_set_percentage = [0.01, 0.05, 0.1, 0.5, 1]

width = 0.2

d_y_max = {
    1: 40,
    7: 300,
    30: 1200
}

def get_cost_for_days(write_percentage, working_set_percentage, days):
    y0_storage = 0
    y0_transaction = 0
    y0_replication = 0
    # for option 1: always replicated
    y1_storage = 0
    y1_transaction = 0
    y1_replication = 0
    # for option 2: on-demand replication
    y2_storage = 0
    y2_transaction = 0
    y2_replication = 0
    # for option 3: only copy during remote run, and delete afterwards
    y3_storage = 0
    y3_transaction = 0
    y3_replication = 0
    for day in days:
        storage_cost_single_site = get_storage_cost(storage_size)
        y0_storage += (storage_cost_single_site)
        y1_storage += (storage_cost_single_site * (1 + working_set_percentage))
        y2_storage += (storage_cost_single_site * (1 + working_set_percentage))
        y3_storage += (storage_cost_single_site * (1 + working_set_percentage * 1/4))
        transaction_cost_single_site = get_transaction_cost(transaction_count, write_percentage)
        y0_transaction += (transaction_cost_single_site)
        y1_transaction += (transaction_cost_single_site + get_write_transaction_cost(transaction_count, write_percentage))
        y2_transaction += (transaction_cost_single_site)
        y3_transaction += (transaction_cost_single_site)
        replication_cost_outgoing = get_replication_cost(storage_size * working_set_percentage)
        replication_cost_incoming = get_replication_cost(storage_size * working_set_percentage * write_percentage)
        y0_replication += (0)
        y1_replication += (replication_cost_outgoing + replication_cost_incoming)
        option2_outgoing_cost = replication_cost_outgoing * (1 if day == 0 else write_percentage)
        y2_replication += (option2_outgoing_cost + replication_cost_incoming)
        y3_replication += (replication_cost_outgoing + replication_cost_incoming)
    return [
        (y0_storage, y0_transaction, y0_replication),
        (y1_storage, y1_transaction, y1_replication),
        (y2_storage, y2_transaction, y2_replication),
        (y3_storage, y3_transaction, y3_replication),
    ]

def plot_cost_for_days(days):
    num_days = len(days)
    colors = [get_next_color() for n in range(3)]
    for write_percentage in l_write_percentage:
        fig, ax = plt.subplots()
        index = 0
        x = []
        # (l_storage, l_transaction, l_replication)
        y0 = ([], [], [])
        # for option 1: always replicated
        y1 = ([], [], [])
        # for option 2: on-demand replication
        y2 = ([], [], [])
        # for option 3: only copy during remote run, and delete afterwards
        y3 = ([], [], [])
        for working_set_percentage in l_working_set_percentage:
            x.append(index)
            index += 1
            l_cost = get_cost_for_days(write_percentage, working_set_percentage, days)
            for i in range(3):
                y0[i].append(l_cost[0][i])
                y1[i].append(l_cost[1][i])
                y2[i].append(l_cost[2][i])
                y3[i].append(l_cost[3][i])
        # print(x)
        x = np.array(x)
        ax.bar(x - 1.5 * width, y0[0], width, label='Storage', color=colors[0], edgecolor='black')
        ax.bar(x - 1.5 * width, y0[1], width, label='Transaction', color=colors[1], edgecolor='black', bottom=y0[0])
        ax.bar(x - 1.5 * width, y0[2], width, label='Replication', color=colors[2], edgecolor='black', bottom=np.array(y0[0])+np.array(y0[1]))
        ax.bar(x - 0.5 * width, y1[0], width, label=None, color=colors[0], edgecolor='black')
        ax.bar(x - 0.5 * width, y1[1], width, label=None, color=colors[1], edgecolor='black', bottom=y1[0])
        ax.bar(x - 0.5 * width, y1[2], width, label=None, color=colors[2], edgecolor='black', bottom=np.array(y1[0])+np.array(y1[1]))
        ax.bar(x + 0.5 * width, y2[0], width, label=None, color=colors[0], edgecolor='black')
        ax.bar(x + 0.5 * width, y2[1], width, label=None, color=colors[1], edgecolor='black', bottom=y2[0])
        ax.bar(x + 0.5 * width, y2[2], width, label=None, color=colors[2], edgecolor='black', bottom=np.array(y2[0])+np.array(y2[1]))
        ax.bar(x + 1.5 * width, y3[0], width, label=None, color=colors[0], edgecolor='black')
        ax.bar(x + 1.5 * width, y3[1], width, label=None, color=colors[1], edgecolor='black', bottom=y3[0])
        ax.bar(x + 1.5 * width, y3[2], width, label=None, color=colors[2], edgecolor='black', bottom=np.array(y3[0])+np.array(y3[1]))

        # ax.set_xscale('log')
        # ax.set_xlim(pow(10, -4), pow(10, 1))
        # ax.get_xaxis().set_major_formatter(FuncFormatter(lambda x, p: convert_to_percentage_string(pow(10, x))))
        ax.set_xticks(x)
        ax.set_xticklabels([int(x) for x in 100 * np.array(l_working_set_percentage)])
        # ax.get_xaxis().set_major_formatter(FuncFormatter(lambda x, p: int(x)))
        ax.set_xlabel('Working set percentage (%)')
        ax.set_ylabel('Cost ($)')
        if num_days in d_y_max:
            ax.set_ylim(0, d_y_max[num_days])
        # ax.set_yscale('log')
        ax.set_title('Total cost of %d day deployment with %s%% writes' % (num_days, convert_to_percentage_string(write_percentage)))
        ax.yaxis.grid(linestyle = 'dotted')
        ax.legend()
        plt.savefig('cost.days=%d.write=%s%%.png' % (num_days, convert_to_percentage_string(write_percentage)))

In [None]:
plot_cost_for_days([0])
plot_cost_for_days([x for x in range(7)])
plot_cost_for_days([x for x in range(30)])