In [None]:
path = "results/"

# Colors

In [None]:
colors = [['#7f2704', '#a50f15', '#de2d26', '#fb6a4a', '#ff005d', '#fcbba1'], 
          ['#000000', '#464b52', '#3d6085', '#3f7eb5', '#3ca5e8', '#79e8ff']]

# Functions

### Functions for preprocessing & ploting

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
import math
import os

try:
    os.mkdir(os.path.join(path, 'Plots'))
except OSError as error:
    pass

In [None]:
def preprocess( path,  ):
    # preprocess the files with the results and extract plotting information
    num_ratios = 6
    district_tc = 6

    throughputs = np.genfromtxt(path, delimiter = None)
    
    # Compute frontier
    frontier_idx = [];
    drop = 0;
    for i in range(0, len(throughputs)):
        for j in range(0, len(throughputs)):
            if((i != j) and (throughputs[i][0] <= throughputs[j][0]) and (throughputs[i][1] <= throughputs[j][1])):
                drop = 1;
        if(drop == 0):
            frontier_idx.append(i)
        else:
            drop = 0;
    
    frontier_len = len(frontier_idx)
    frontier = np.empty([2, frontier_len])
    for i in range(0, frontier_len):
        frontier[0][i] = throughputs[frontier_idx[i]][0]
        frontier[1][i] = throughputs[frontier_idx[i]][1]
    idx = np.argsort(frontier[0])
    
    # Vertical Lines - Fixed T
    district_tc = num_ratios #int(len(throughputs)/num_ratios)  # number of different T clients 
    district_ac = num_ratios  # number of different A clients
    
    offset = len(throughputs) - (district_tc * district_ac) # The data point offset of real mix workload

    tt_v = np.empty([district_tc, district_ac])
    ta_v = np.empty([district_tc, district_ac])
    for i in range(0, district_tc):
        for j in range(0, district_ac):
            tt_v[i][j] = throughputs[i* district_ac + j + offset][0]
            ta_v[i][j] = throughputs[i* district_ac + j + offset][1]
    
    # Horizontal Lines - Fixed A
    ta_h = np.empty([district_ac, district_tc])
    tt_h = np.empty([district_ac, district_tc])
    for i in range(0, district_ac):
        for j in range(0, district_tc):
            tt_h[i][j] = throughputs[j* district_ac + i + offset][0]
            ta_h[i][j] = throughputs[j* district_ac + i + offset][1]
            
    return tt_v, ta_v, tt_h, ta_h, district_tc, frontier, idx

In [None]:
def regular_plot(x, y, n, label, clients, choice, plt):

    for i in range(0, n):  
      if i%2 == 0:
            #print("i:", i)
            plt.plot(list(x[i]), list(y[i]), 'o-', c=colors[choice][i], label=clients[choice][i], markerfacecolor='none', linewidth=6.0, markersize=10)
      else:
            #print("i:", i)
            plt.plot(list(x[i]), list(y[i]), 's-', c=colors[choice][i], label=clients[choice][i], markerfacecolor='none', linewidth=4.0, markersize=10)
    
    plt.grid(color='#CCCCCC', linestyle='--', linewidth=0.5)

    tc = [x[2][4], x[3][3], x[4][2]]
    ac = [y[2][4], y[3][3], y[4][2]]

    #plt.xlabel('Transactional Throughput (tps)', fontsize=14)
    #plt.ylabel('Analytical Throughtput (qps)', fontsize=14)
    #plt.rc('grid', linestyle="--", color='#CCCCCC')
    #plt.title(label, fontsize=15)
    #plt.legend(loc="upper right", prop={'size':17})
    #plt.rcParams.update({'font.size': 15})
    #plt.grid()
    #plt.show()
    
    return tc, ac

In [None]:
def double_regular_plot(x_v, y_v, x_h, y_h, n, label, choice, plt):

    for i in range(0, district_tc): 
        if (i==0):
            plt.plot(list(x_v[i]), list(y_v[i]), 'o--', c=colors[0][3],markerfacecolor='none', linewidth=3, label='Fixed-T lines',  markersize=10)
        else:
            plt.plot(list(x_v[i]), list(y_v[i]), 'o--', c=colors[0][3],markerfacecolor='none', linewidth=3,  markersize=10)

    for i in range(0, district_tc):   
        if(i==0):
            plt.plot(list(x_h[i]), list(y_h[i]), 'o--', c=colors[1][4], markerfacecolor='none', linewidth=3, label='Fixed-A lines',  markersize=10)
        else:
            plt.plot(list(x_h[i]), list(y_h[i]), 'o--', c=colors[1][4], markerfacecolor='none', linewidth=3,  markersize=10)

    plt.grid(color='#CCCCCC', linestyle='--', linewidth=0.6)

    #plt.xlabel('Transactional Throughput (tps)', fontsize=20)
    #plt.ylabel('Analytical Throughtput (qps)', fontsize=20)
    #plt.rc('grid', linestyle="--", color='#CCCCCC', linewidth=1)
    #plt.title(label, fontsize=20)
    #plt.legend(loc="upper right")
    #plt.rcParams.update({'font.size': 20})
    #plt.grid()
    print("plt:", plt)

    return

In [None]:
def frontier_plot(x, idx, label, plt, tc, ac, vx1, vy1, vx2, vy2, vx3, vy3, t1, t2, t3, type):
    
    plt.plot(x[0][idx], x[1][idx], '-', c='green', linewidth=6.0, label='Frontier', zorder=2)
    plt.fill_between(x[0][idx], x[1][idx], color='grey', hatch='//', alpha=0.08,  label='AUC') 
    
    plt.grid(color='#CCCCCC', linestyle='--', linewidth=0.8, zorder=1)
    if type == 1:
      plt.scatter(tc, ac, marker="o", s=100, color='#000000',zorder=3)
      arrow_properties = dict(
      facecolor="black", width=0.5,
      headwidth=0, shrink=0.1)
      plt.annotate(t1, (tc[0], ac[0]), xytext =(tc[0]+vx1, ac[0]+vy1), color='#000000', size=30, weight='bold', arrowprops=arrow_properties)
      plt.annotate(t2, (tc[1], ac[1]), xytext =(tc[1]+vx2, ac[1]+vy2), color='#000000', size=30, weight='bold', arrowprops=arrow_properties)
      plt.annotate(t3, (tc[2], ac[2]), xytext =(tc[2]+vx3, ac[2]+vy3), color='#000000', size=30, weight='bold',arrowprops=arrow_properties)

    #plt.xlabel('Transactional Throughput (tps)', fontsize=14)
    #plt.ylabel('Analytical Throughtput (qps)', fontsize=14)
    #plt.rc('grid', linestyle="--", color='#CCCCCC')
    #plt.title(label, fontsize=15)
    #plt.legend(loc="center right", prop={'size':14})
    #plt.rcParams.update({'font.size': 15})
    #plt.grid()
    #plt.show()
    
    return

In [None]:
def area_between_curves(frontier, idx, label):
    proportional_line(frontier, idx)
    proportional = [[0, maxTT(frontier, idx)],[maxAT(frontier, idx), 0]]
    x = range(0, math.floor(maxTT(frontier, idx)), math.floor(maxTT(frontier, idx)/(len(frontier[0][idx])-1)))
    y = - (maxAT(frontier, idx)/maxTT(frontier, idx))*x + maxAT(frontier, idx)
    plt.plot(frontier[0][idx], frontier[1][idx], '-', c='green', linewidth=3.0, label='Frontier')
    plt.fill_between(np.append(frontier[0][idx], x[::-1]), np.append(frontier[1][idx], y[::-1]), color='grey', hatch='//', alpha=0.08,  label='Area between curves') 
    plt.xlabel('Transactional Throughput (tps)', fontsize=14)
    plt.ylabel('Analytical Throughtput (qps)', fontsize=14)
    plt.rc('grid', linestyle="--", color='#CCCCCC')
    plt.title(label, fontsize=16)
    plt.legend(loc="center right")
    plt.rcParams.update({'font.size': 10})
    plt.grid()
    plt.show()
    return y

In [None]:
 #       ===============   =============
 #       Location String   Location Code
 #       ===============   =============
 #       'best'            0
 #       'upper right'     1
 #       'upper left'      2
 #       'lower left'      3
 #       'lower right'     4
 #       'right'           5
 #       'center left'     6
 #       'center right'    7
 #       'lower center'    8
 #       'upper center'    9
 #       'center'          10
 #       ===============   =============

### Functions for metrics

In [None]:
#Compute Area Under the Curve (AUC) using the trapezoidal rule.
def auc_fun(x, i):
    auc = metrics.auc(x[0][i], x[1][i])
    return auc

In [None]:
def slope(x, i):
    slope = x[1][i].max()/x[0][i].max()
    return slope

In [None]:
def maxTT(x, i):
    maxTT = x[0][i].max()
    return maxTT

In [None]:
def maxAT(x, i):
    maxAT = x[1][i].max()
    return maxAT

In [None]:
def proportional_line(frontier, idx, plt):
    
    proportional = [[0, maxTT(frontier, idx)],[maxAT(frontier, idx), 0]]
    plt.plot(proportional[0], proportional[1], '--', c='#084594', linewidth=3, label='Proportional line', zorder=1)
    
    return

In [None]:
def bounding_box(frontier, idx, plt):
    
    b1 = [[0, maxTT(frontier, idx)],[maxAT(frontier, idx), maxAT(frontier, idx)]]
    b2 = [[maxTT(frontier, idx), maxTT(frontier, idx)],[0, maxAT(frontier, idx)]]
    plt.plot(b1[0], b1[1], '--', c='#cb181d', linewidth=3, label='Bounding box')
    plt.plot(b2[0], b2[1], '--', c='#cb181d', linewidth=3)
    return

In [None]:
def abc_fun(frontier, idx, y):
    g = abs(frontier[1][idx] - y)
    abc = metrics.auc(frontier[0][idx], g)/(0.5*maxTT(frontier, idx)*maxAT(frontier, idx))
    return abc

# Throughput Frontier 

In [None]:
clients = [
['$TC=0$', '$TC=0.1*τ_{max}$', '$TC=0.2*τ_{max}$', '$TC=0.5*τ_{max}$', '$TC=0.8*τ_{max}$', '$TC=τ_{max}$'], 
['$AC=0$', '$AC=0.1*α_{max}$', '$AC=0.2*α_{max}$', '$AC=0.5*α_{max}$', '$AC=0.8*α_{max}$', '$AC=α_{max}$']
]

yaxes_size =30
xaxes_size =30
c = 30
label_size = 38
legend_size = 28
sub_label_size=20

In [None]:
def draw_frontier_group(file, ax0, ax1, ax2, ylabel):
    print("Drawing froniter from file:", file)
    tt_v, ta_v, tt_h, ta_h, district_tc, frontier, idx  = preprocess(path+file)
    ax0.axis([0,max_t_qps, 0,max_a_qps])
    ax1.axis([0,max_t_qps, 0,max_a_qps])
    ax2.axis([0,max_t_qps, 0,max_a_qps])

    ax0.set_xticks(x, minor=False)
    ax0.set_xticklabels(xticks, rotation=0, fontsize=xaxes_size)
    ax1.set_xticks(x, minor=False)
    ax1.set_xticklabels(xticks, rotation=0, fontsize=xaxes_size)
    ax2.set_xticks(x, minor=False)
    ax2.set_xticklabels(xticks, rotation=0, fontsize=xaxes_size)

    ax0.set_yticks(y, minor=False)
    ax0.set_yticklabels(y2ticks, rotation=0, fontsize=yaxes_size)
    ax1.set_yticks(y, minor=False)
    ax1.set_yticklabels(yticks, rotation=0, fontsize=yaxes_size)
    ax2.set_yticks(y, minor=False)
    ax2.set_yticklabels(yticks, rotation=0, fontsize=yaxes_size)

    ax2.yaxis.set_label_position("right")
    ax2.set_ylabel(ylabel, fontsize=sub_label_size) 

    tc, ac = regular_plot(tt_v, ta_v, 6, "", clients, 0, ax0)
    regular_plot(tt_h, ta_h, 6, "", clients, 1, ax1)
    proportional_line(frontier, idx, ax2)
    bounding_box(frontier, idx, ax2)
    frontier_plot(frontier, idx, "", ax2, tc, ac, 500, 0, 500, 0, -1000, 0, r"$f_2:0s$", r"$f_5:0s$", r"$f_8:0s$", 1)


# 3. TiDB

In [None]:
cm = 1/2.54  # centimeters in inches

nplots_rows = 1
f_single, (ax7, ax8, ax9) \
    = plt.subplots(nplots_rows, 3, figsize=(60*cm,7.5*nplots_rows*cm))

plt.subplots_adjust(wspace=0.08, hspace=0.2)
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 200
plt.rcParams['text.usetex'] = False

t_qps_step = 1000
max_t_qps = 5000
x = [x for x in range(0, max_t_qps + t_qps_step, t_qps_step)]
xticks = [str(int(xi/t_qps_step)) for xi in x]
max_a_qps = 0.4
y = [0, 0.1, 0.2, 0.3, 0.4]
yticks = ['' for _ in y]
y2ticks = ['0', ] + ['{:.1f}'.format(yi)[1:] for yi in y[1:]]

# ------ tidb_dist, 3 tikv (1 replica), 1 tiflash, SF100 ------
file = 'frontier-SF100.tidb_dist.1x.csv'
draw_frontier_group(file, ax7, ax8, ax9, 'TiDB-Dist-1x')

ax7.set_title('Fixed-T lines', fontsize=sub_label_size+10)
ax8.set_title('Fixed-A lines', fontsize=sub_label_size+10)
ax9.set_title('Throughput Frontier', fontsize=sub_label_size+10)
ax7.set_ylabel('Analytical Throughput (qps)', fontsize=label_size-9, labelpad=30)
ax8.set_xlabel('Transactinal Throughput ($10^3$ tps)', fontsize=label_size, labelpad=30)
ax7.legend(loc='center left', bbox_to_anchor=(-0.05, 1.66),ncol=2, fontsize=legend_size ) 
ax8.legend(loc='upper left', bbox_to_anchor=(0.5,2.16),ncol=2, fontsize=legend_size ) 

#f_single.savefig(path+'Plots/tidb_1x.png', dpi=800, bbox_inches='tight')
f_single.savefig(path+'Plots/tidb_1x.pdf', dpi=800, bbox_inches='tight')

In [None]:
nplots_rows = 5
f, [
    (ax7_pg, ax8_pg, ax9_pg),
    (ax7_non_bind, ax8_non_bind, ax9_non_bind),
    (ax7_bind, ax8_bind, ax9_bind),
    (ax7_tidb_dist_1x, ax8_tidb_dist_1x, ax9_tidb_dist_1x),
    (ax7_tidb_dist_2x, ax8_tidb_dist_2x, ax9_tidb_dist_2x),
] = plt.subplots(nplots_rows, 3, figsize=(60*cm,7.5*nplots_rows*cm))

plt.subplots_adjust(wspace=0.08, hspace=0.2)
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 200
plt.rcParams['text.usetex'] = False

# ------ scale ------
t_qps_step = 1000
max_t_qps = 5000
x = [x for x in range(0, max_t_qps + t_qps_step, t_qps_step)]
xticks = [str(int(xi/1000)) for xi in x]
max_a_qps = 0.4001
a_qps_step = 0.1
y = [0 + yi * a_qps_step + 0.01 for yi in range(int(max_a_qps / a_qps_step))]
yticks = ['' for _ in y]
y2ticks = ['0', ] + ['{:.1f}'.format(yi)[1:] for yi in y[1:]]

# ------ pg SF100 ------
file = "frontier-SF100.pg.csv"
draw_frontier_group(file, ax7_pg, ax8_pg, ax9_pg, 'PostgreSQL')
# ------ tidb SF100 ------
file = "frontier-SF100.tidb.csv"
draw_frontier_group(file, ax7_non_bind, ax8_non_bind, ax9_non_bind, 'TiDB')
# ------ tidb SF100 CPU bind ------
file = "frontier-SF100.tidb.cpu_bind.csv"
draw_frontier_group(file, ax7_bind, ax8_bind, ax9_bind, 'TiDB_bind')
# ------ tidb_dist, 3 tikv (1 replica), 1 tiflash, SF100 ------
file = 'frontier-SF100.tidb_dist.1x.csv'
draw_frontier_group(file, ax7_tidb_dist_1x, ax8_tidb_dist_1x, ax9_tidb_dist_1x, 'TiDB-Dist-1x')

t_qps_step = 1000
max_t_qps = 8500
x = [x for x in range(0, max_t_qps + t_qps_step, t_qps_step)]
xticks = [str(int(xi/1000)) for xi in x]
max_a_qps = 0.7002
a_qps_step = 0.1
y = [0 + yi * a_qps_step + 0.01 for yi in range(int(max_a_qps / a_qps_step))]
yticks = ['' for _ in y]
y2ticks = ['0', ] + ['{:.1f}'.format(yi)[1:] for yi in y[1:]]
# ------ tidb_dist, 6 tikv, 2 tiflash, SF100 ------
file = 'frontier-SF100.tidb_dist.2x.csv'
draw_frontier_group(file, ax7_tidb_dist_2x, ax8_tidb_dist_2x, ax9_tidb_dist_2x, 'TiDB-Dist-2x')

ax7_pg.set_title('Fixed-T lines', fontsize=sub_label_size+10)
ax8_pg.set_title('Fixed-A lines', fontsize=sub_label_size+10)
ax9_pg.set_title('Throughput Frontier', fontsize=sub_label_size+10)
ax7_bind.set_ylabel('Analytical Throughput (qps)', fontsize=label_size-9, labelpad=30)
ax8_tidb_dist_2x.set_xlabel('Transactinal Throughput ($10^3$ tps)', fontsize=label_size, labelpad=30)
ax7_pg.legend(loc='center left', bbox_to_anchor=(-0.05, 1.66),ncol=2, fontsize=legend_size ) 
ax8_pg.legend(loc='upper left', bbox_to_anchor=(0.5,2.26),ncol=2, fontsize=legend_size ) 

f.savefig(path+'Plots/tidb_scale.pdf', dpi=800, bbox_inches='tight')
# f.savefig(path+'Plots/tidb_scale.png', dpi=800, bbox_inches='tight')

# Across Systems 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams["figure.figsize"] = [7.5, 3.5]
#plt.rcParams["figure.autolayout"] = True
plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 200
plt.rcParams['text.usetex'] = True

t_qps_step = 1000
max_t_qps = 12000
x = [x for x in range(0, max_t_qps + t_qps_step, t_qps_step)]
xticks = [str(int(xi/t_qps_step)) for xi in x]
a_qps_step = 0.1
max_a_qps = 0.7001
y = [0 + yi * a_qps_step + 0.01 for yi in range(int(max_a_qps / a_qps_step))]
#print(y)
yticks = ['' for _ in y]
y2ticks = ['{:.1f}'.format(yi) for yi in y]

plt.axis([0,max_t_qps, 0,max_a_qps])

def draw_group(file, color):
    tt_v, ta_v, _tt_h, _ta_h, district_tc, frontier, idx = preprocess(path+file)
    tc, ac = regular_plot(tt_v, ta_v, district_tc, "", clients, 0, ax7)
    plt.scatter(tc[1], ac[1], marker="o", s=50, color=color,zorder=3)
    arrow_properties = dict(facecolor="black", width=0.5,headwidth=0, shrink=0.1)
    plt.annotate(r'$f_5:0s$', (tc[1], ac[1]), xytext =(tc[1]+400, ac[1]+a_qps_step/2), color='#000000', size=12, weight='bold', arrowprops=arrow_properties)
    return frontier, idx

# 'slateblue'/'green'/'cyan'/'darkorange'/'firebrick'
# ------ PostgreSQL ------
file = 'frontier-SF100.pg.csv'
frontier0, idx0 = draw_group(file, 'cyan')
plt.plot(frontier0[0][idx0], frontier0[1][idx0], '-', linewidth=3.0, label='PostgreSQL', color='cyan')
# # ------ tidb SF100 CPU bind ------
# file="frontier-SF100.tidb.csv"
# frontier3, idx3 = draw_group(file, 'cyan')
# plt.plot(frontier3[0][idx3], frontier3[1][idx3], '-', linewidth=3.0, label='TiDB', color='cyan')
# # ------ tidb SF100 CPU bind ------
# file = "frontier-SF100.tidb.cpu_bind.csv"
# frontier4, idx4 = draw_group(file, 'green')
# plt.plot(frontier4[0][idx4], frontier4[1][idx4], '-', linewidth=3.0, label='TiDB-bind', color='green')
# ------ tidb_dist, 3 tikv, 1 tiflash, SF100 ------
file = 'frontier-SF100.tidb_dist.1x.csv'
frontier5, idx5 = draw_group(file, 'green')
plt.plot(frontier5[0][idx5], frontier5[1][idx5], '-', linewidth=3.0, label='TiDB-Dist-1x', color='green')
# ------ tidb_dist, 6 tikv, 2 tiflash, SF100 ------
file = 'frontier-SF100.tidb_dist.2x.csv'
frontier6, idx6 = draw_group(file, 'firebrick')
plt.plot(frontier6[0][idx6], frontier6[1][idx6], '-', linewidth=3.0, label='TiDB-Dist-2x', color='firebrick')

plt.xticks(x, xticks, fontsize=12)
plt.yticks(y,y2ticks, fontsize=12)

plt.xlabel('Transactional Throughput ($10^3$ tps)', fontsize=13)
plt.ylabel('Analytical Throughput (qps)', fontsize=13)
plt.rc('grid', linestyle="--", color='#CCCCCC')
plt.legend(loc="upper right", fontsize=12) # bbox_to_anchor=(1.05, 1.2), ncol=5)
plt.grid()
plt.savefig(path+'/Plots/across.png', dpi=400, bbox_inches='tight')
#plt.savefig(path+'/Plots/across.png', dpi=400, bbox_inches='tight')
plt.show()