In [1]:
def cloud_storage_cost():
    """
    This function return the storage price of Google Cloud in Los Angeles area in December 2020 
    (storage price = $0.023/GB/month) 
    given storage time and storage amount from the user
    storage price = 2.3 cents/GB/month
    consider 10GBs free each month
    """
    
    while True:
        storage_amount = input("Enter the storage amount in one month(in GB): ")
        s_price = 0.023
        s_free = 10
        if check_number(storage_amount):
            if float(storage_amount) > 0:
                break
        else:
            print("Storage time and storage amount must be positive numbers. Please try again")
    s_amount = float(storage_amount)
    s_cost = 0
    if s_amount > s_free:
        s_cost = (s_amount-s_free)*s_price
    return f'Total storage cost in one month is: ${s_cost}';

def cloud_storage_cost1(c_amount):
    """
    This function is similar to cloud_storage_cost(), but no user input
    """
    c_free = 10
    c_price = 0.023
    c_cost = 0
    if c_amount > c_free:
        c_cost = (c_amount-c_free)*c_price
    return c_cost
    

def premises_storage_cost():
    """
    This function return the storage cost of on-premised storage
    (storage price = $0.0427/GB/month)
    given storage time and storage amount from the user
    """
    while True:
        storage_amount1 = input("Enter the storage amount for on-premises in one month(in GB): ")
        s_price1 = 0.0427
        if check_number(storage_amount1):
            if float(storage_amount1) > 0:
                break
        else:
            print("Storage amount must be a positive number. Please try again")
    s_amount1 = float(storage_amount1)
    s_cost1 = s_amount1*s_price1
    return f'Total storage cost for one month is: ${s_cost1}'
        
def premises_storage_cost1(p_amount):
    """
    This function is similar to premises_storage_cost(), but no user input
    """
    p_price = 0.0427
    p_cost = p_amount*p_price
    return p_cost

def hybrid_cost():
    """
    This function returns the cost of hybrid database system.
    The user inputs are the amount of storage and fraction of amount stored in cloud.
    Based on our paper, the query cost is $3.36431 per month per 375.387 GBs dataset for 30 users,
    which is about $0.00896 per month per GB
    """
    import numpy as np
    while True:
        storage_amount2 = input("Enter the total storage amount in one month(in GB): ")
        storage_frac2 = input("Enter the storage fraction that is stored on cloud (i.e: 0.3): ")
        if check_number(storage_amount2) & check_number(storage_frac2):
            if (float(storage_amount2) >0) & (float(storage_frac2) > 0) & (float(storage_frac2) <1):
                break
        else:
            print("Storage amount and fraction must be a positive number. Fraction must be less than 1. Please try again")
    s_amount2 = float(storage_amount2)
    s_frac2 = float(storage_frac2)
    
    cf_amount2 = s_amount2*s_frac2
    query_cost2 = 0.00896*cf_amount2
    pf_amount2 = s_amount2*(1-s_frac2)
    
    h_cost2 = cloud_storage_cost1(cf_amount2) + premises_storage_cost1(pf_amount2) + query_cost2
    return f'Total storage cost for one month is: ${h_cost2}'
    
def check_file_size(file_path):
    """
    This function will return the size of each and total data files lying in a choosen directory
    The parameter is the directory of the folder
    """
    import glob
    import os
    files = os.listdir(file_path)
    total_size = 0
    for file in files:
        file_size = os.path.getsize(file_path + file)
        total_size += file_size
        print(f"File name: {file}")
        print(f"Size: {file_size} bytes")
        print()
    print("Total dataset size (in GBs): ")
    return total_size/1000000000
    
    
def check_number(x):
    """
    This function checks if a parameter is a number or not. Return True or False
    """
    try:
        float(x)
        return True
    except ValueError:
        return False
    
    

In [10]:
def hybrid_cost1(h_amount,frac):
    """
    This function returns the cost of hybrid database system.
    The parameters are the amount of storage and fraction of amount stored in cloud.
    Based on our paper, the query cost is $3.36431 per month per 375.387 GBs dataset for 30 users,
    which is about $0.00896 per month per GB
    (The number is from our paper)
    """
    cf_amount = h_amount*frac
    query_cost = 0.00896*cf_amount
    pf_amount = h_amount*(1-frac)
    h_cost = cloud_storage_cost1(cf_amount) + premises_storage_cost1(pf_amount) + query_cost
    return h_cost
    
def hybrid_cost_list(h_amount1, frac_list =[]):
    """
    This function will return a list of the cost of the hybrid system with any input fraction cloud/on-premises
    The parameters are the amount of storage of the dataset and a list fraction of cloud storage,
    the remaining portion is the on-premise storage
    """
    import numpy as np
    frac_list = np.array(frac_list)
    h_list_cost = []
    for i in frac_list:
        h_list_cost.append(hybrid_cost1(h_amount1,i))
    h_list_cost = np.array(h_list_cost)
    return h_list_cost
 

In [13]:
def hybrid_premise_graph(storage_amount, frac_list1 = []):
    """
    This function will show a bar graph showing the cost of hybrid and on-premises database system with different fraction.
    The parameters are the storage amount in GB and a list of fraction of cloud storage
    """
    import matplotlib.pyplot as plt
    import numpy as np
    h = hybrid_cost_list(storage_amount, frac_list1)
    p = [premises_storage_cost1(storage_amount) for i in range(len(h))]
    
    barWidth = 0.25
    r1 = np.arange(len(h))
    r2 = [x + barWidth for x in r1]
    p1 = plt.bar(r1, h, color='red', width=barWidth, edgecolor='white', label='hybrid cost')
    p2 = plt.bar(r2, p, color='blue', width=barWidth, edgecolor='white', label='on-premises cost')
    
    plt.title('Hybrid vs On-premises Cost')
    plt.xlabel('Cloud fraction')
    plt.xticks([r + barWidth for r in range(len(h))], frac_list1)
    plt.ylabel('Cost (in dollars)')
    
    plt.legend((p1[0],p2[0]),("Hybrid","On-premises"))
    plt.show()
