In [1]:
import numpy
from pt.plotter import *
from bqplot import LogScale

In [2]:
from IPython.display import HTML as DHTML

# function to create those toggle buttons, adapted from something from stackoverflow
def add_toggle_button(desc, *inputs):
    x = ', '.join([str(x) for x in inputs])
    n = str(inputs[0])
    
    code = '''
        <script>
        var others%n = [%x];
        var code_shown%n = true; 
        function code_toggle%n () {
            var selector = "div.input";
            var inputs = $(selector).toArray();
            if (code_shown%n) {
                for (var i in others%n) {
                    var x = others%n[i];
                    $(inputs[x]).hide();
                }
            }
            else {
                for (var i in others%n) {
                    var x = others%n[i];
                    $(inputs[x]).show();
                }
            }

            code_shown%n = !code_shown%n;
        } 
        $( document ).ready(code_toggle%n);
        </script>
        <form action="javascript:code_toggle%n()">
        <input type="submit" value="Toggle on/off the display of the %d code.">
        </form>'''
    
    return code.replace('%d', desc).replace('%n', str(n)).replace('%x', x)

DHTML(add_toggle_button('document setup', 0, 1, 2, 3))

In [3]:
%%html
<style>
/* prevent truncation of the slider labels */
.widget-label, .widget-button, .jupyter-button, .widget-readout {
    width: unset !important;
    min-width: fit-content !important;
}
.widget-vbox > .widget-label {
    font-weight: bolder;
}
</style>

In [4]:
default_options = {
    'pre_resp_time': [0, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 1e-1],
    'pre_resp_txn_time': [0, 1e-4, 2e-4, 5e-4, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2],
    'post_resp_time': [0, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2, 1e-1],
    'post_resp_txn_time': [0, 1e-4, 2e-4, 5e-4, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2],
    'cpu_utilization_base': [60.0, 62.5, 65.0, 67.5, 70.0, 72.5, 75.0, 77.5, 80.0, 82.5, 85.0, 87.5, 90.0, 92.5, 95.0, 97.5],
    'cpu_utilization_txn': [60.0, 62.5, 65.0, 67.5, 70.0, 72.5, 75.0, 77.5, 80.0, 82.5, 85.0, 87.5, 90.0, 92.5, 95.0, 97.5]
}

set_default_plotter_options(default_options)

default_pre_resp_time = 5e-3
default_pre_resp_txn_time = 1e-3
default_post_resp_time = 5e-3
default_post_resp_txn_time = 1e-3
default_cpu_utilization_base = 75.0
default_cpu_utilization_txn = 87.5

minimum_rate = numpy.zeros(10000) + 1e-6
maximum_rate = numpy.zeros(10000) + 10 ** 4
xr = numpy.logspace(1, 4, 10000)

min_cpu_utilization = default_options['cpu_utilization_base'][0]
# min_cpu_utilization = 1 / (1 - default_cpu_utilization_base/100.0)

# Async Request/Response model


```
   users'             
  responses               |----------|            |----------|            |----------|
<------------------------ |response 1| <--------- |response 2| <--------- |response 3|
                          |----------|            |----------|            |----------|

                          ^                       ^                       ^
                          |                       |                       |
                          |                       |                       |
                          |                       |                       |
   users'
  requests |----| |---------------| |----| |---------------| |----| |---------------|
---------> |wait| |<- request 1 ->| |wait| |<- request 2 ->| |wait| |<- request 3 ->|
           |----| |---------------| |----| |---------------| |----| |---------------|

                  |               |
                  |               |
                 /                 \
                /                   \
               /                     \
              /                       \
             /                         \
            /                           \
           /                             \
          |                              |
          |                              |
          v                              v

          |------------------------------|   pre-resp = "execution time" before response is emitted        
          |              |               |   post-resp = remaining "execution time" after response is emitted
          |<- pre-resp ->|<- post-resp ->|
          |              |               |
          |------------------------------|

                         |
                         |
                         |
                         v

                     ----------
                     |response|
                     ----------


"execution time" = customer logic processing time + new relic processing time + "waiting time"
"waiting time" = total time spent idle waiting for execution
                 NOTE: because of how the ioloop works (i.e. it interleaves), this will actually
                 be "distributed" into the pre-resp period of the current request and post-resp
                 period of the prior request(s). at lower input throughputs this "waiting time"
                 will functionally disappear if the cpu utilization is low enough such that 
```

# Average Waiting Time (Latency) for M/D/1 queue

M/D/1 means "markovian generator feeding into 1 deterministic consumer"

i.e. the M means that we're assuming that the time between requests is distributed according to a Poisson Distribution, and D means is that we're assuming the the 

In [5]:
def waiting_time(x):
    return 1 / (1 - x)

x = numpy.linspace(0, 0.99, 100)
fig = make_quick_plot(
        x, waiting_time, 
        ax='Utilization Factor (arrival rate / service_rate)',
        ay='Waiting Time Factor',
        ysc_min=0,
        ysc_max=10)

VBox()

Figure(axes=[Axis(label='Utilization Factor (arrival rate / service_rate)', scale=LinearScale(max=0.9899999999…

In [6]:
def calc_latency(reqs_per_min, base_time, txn_time):
    arrival_rate = reqs_per_min / 60.0
    service_time = base_time + txn_time
    service_rate = 1 / service_time
    
    # for M/M/1 Queue
    # diff = numpy.maximum(minimum_rate, service_rate - arrival_rate)
    # waiting_time = arrival_rate / (service_rate * diff)
    
    # for M/D/1 Queue
    queue_utilization = numpy.minimum(1, arrival_rate / service_rate)
    waiting_time = queue_utilization / (2 * service_rate * (1 - queue_utilization + 1e-6))
    
    waiting_time = numpy.minimum(maximum_rate, waiting_time)
    return service_time + waiting_time

def calc_cpu_overhead(total_time, cpu_utilization_factor):
    cpu_overhead_percent = 1 / (1 - max(0, cpu_utilization_factor)/100.0)
    return total_time * (cpu_overhead_percent ** 2)

def calc_latency_with_overhead(reqs_per_min, base_time, txn_time, cpu_utilization_factor):
    latency = calc_latency(reqs_per_min, base_time, txn_time)
    return calc_cpu_overhead(latency, cpu_utilization_factor)

def calc_throughput(reqs_per_min, pre_time, pre_txn_time, post_time, post_txn_time, cpu_utilization_factor):
    latency = calc_latency(reqs_per_min, pre_time, pre_txn_time)
    total_time = latency + post_time + post_txn_time
    total_time_with_cpu_overhead = calc_cpu_overhead(total_time, cpu_utilization_factor)
    return numpy.minimum(reqs_per_min, 60.0 / total_time_with_cpu_overhead)

# to make percentile plots monotomically increasing despite req_rate > service_rate
def set_percentile_ceiling(percentile, min_val, max_val):
    found = False
    for i in range(1, len(percentile)):
        if percentile[i] > min_val:
            found = True
        if found and (percentile[i] < percentile[i-1]):
            percentile[i] = max_val

    return percentile

# Latency w/ Agent vs Latency w/o Agent

In [7]:
def no_agent_latency(x, pre_resp_time=1, pre_resp_txn_time=1, post_resp_time=1, post_resp_txn_time=1, cpu_utilization_base=1, cpu_utilization_txn=1):
    return calc_latency_with_overhead(x, pre_resp_time, 0, 0)

def with_agent_latency(x, pre_resp_time=1, pre_resp_txn_time=1, post_resp_time=1, post_resp_txn_time=1, cpu_utilization_base=1, cpu_utilization_txn=1):
    cpu_utilization = cpu_utilization_txn - cpu_utilization_base
    return calc_latency_with_overhead(x, pre_resp_time, pre_resp_txn_time, cpu_utilization)

fig = make_quick_plot(
        xr, with_agent_latency, no_agent_latency, 
        ax='Requests Per Minute',
        ay="Average Latency",
        ysc_min=0.001,
        ysc_max=1,
        xscale=LogScale,
        yscale=LogScale,
        defaults={
            'pre_resp_time': default_pre_resp_time,
            'pre_resp_txn_time': default_pre_resp_txn_time,
            'post_resp_time': default_post_resp_time,
            'post_resp_txn_time': default_post_resp_txn_time,
            'cpu_utilization_base': default_cpu_utilization_base,
            'cpu_utilization_txn': default_cpu_utilization_txn,
        })

VBox(children=(SelectionSlider(description='Pre_Resp_Time', index=3, options=(0, 0.001, 0.002, 0.005, 0.01, 0.…

Figure(axes=[Axis(label='Requests Per Minute', scale=LogScale(max=10000.0, min=10.0)), Axis(label='Average Lat…

# Percent Latency Increase

In [8]:
lpo_ymax = 1000
lpo_ymin = 2e-4

def latency_percent_overhead(x, pre_resp_time=1, pre_resp_txn_time=1, post_resp_time=1, post_resp_txn_time=1, cpu_utilization_base=1, cpu_utilization_txn=1):
    n = no_agent_latency(x, pre_resp_time, pre_resp_txn_time, post_resp_time, post_resp_txn_time, cpu_utilization_base, cpu_utilization_txn)
    w = with_agent_latency(x, pre_resp_time, pre_resp_txn_time, post_resp_time, post_resp_txn_time, cpu_utilization_base, cpu_utilization_txn)
    
    overhead = 100 * numpy.abs((w - n) / n)
    return set_percentile_ceiling(overhead, lpo_ymin, 2*lpo_ymax)
   
fig = make_quick_plot(
        xr, latency_percent_overhead,
        ax='Requests Per Minute',
        ay="Percent Latency Increase",
        ysc_min=1,
        xscale=LogScale,
        yscale=LogScale,
        ysc_max=lpo_ymax - 1,
        defaults={
            'pre_resp_time': default_pre_resp_time,
            'pre_resp_txn_time': default_pre_resp_txn_time,
            'post_resp_time': default_post_resp_time,
            'post_resp_txn_time': default_post_resp_txn_time,
            'cpu_utilization_base': default_cpu_utilization_base,
            'cpu_utilization_txn': default_cpu_utilization_txn,
        })

VBox(children=(SelectionSlider(description='Pre_Resp_Time', index=3, options=(0, 0.001, 0.002, 0.005, 0.01, 0.…

Figure(axes=[Axis(label='Requests Per Minute', scale=LogScale(max=10000.0, min=10.0)), Axis(label='Percent Lat…

# Throughput w/o Agent vs Throughput w/o Agent

In [9]:
def no_agent_throughput(x, pre_resp_time=1, pre_resp_txn_time=1, post_resp_time=1, post_resp_txn_time=1, cpu_utilization_base=1, cpu_utilization_txn=1):
    return calc_throughput(x, pre_resp_time, 0, post_resp_time, 0, 0)
    
def with_agent_throughput(x, pre_resp_time=1, pre_resp_txn_time=1, post_resp_time=1, post_resp_txn_time=1, cpu_utilization_base=1, cpu_utilization_txn=1):
    cpu_utilization = cpu_utilization_txn - cpu_utilization_base
    return calc_throughput(x, pre_resp_time, pre_resp_txn_time, post_resp_time, post_resp_txn_time, cpu_utilization)

fig = make_quick_plot(
        xr, with_agent_throughput, no_agent_throughput, 
        ax='Requests Per Minute',
        ay='Actual Throughput',
        xscale=LogScale,
        yscale=LogScale,
        ysc_min=10,
        ysc_max=10000,
        defaults={
            'pre_resp_time': default_pre_resp_time,
            'pre_resp_txn_time': default_pre_resp_txn_time,
            'post_resp_time': default_post_resp_time,
            'post_resp_txn_time': default_post_resp_txn_time,
            'cpu_utilization_base': default_cpu_utilization_base,
            'cpu_utilization_txn': default_cpu_utilization_txn,
        })

VBox(children=(SelectionSlider(description='Pre_Resp_Time', index=3, options=(0, 0.001, 0.002, 0.005, 0.01, 0.…

Figure(axes=[Axis(label='Requests Per Minute', scale=LogScale(max=10000.0, min=10.0)), Axis(label='Actual Thro…

# Percent Throughput Reduction

In [10]:
tpr_ymax = 100
tpr_ymin = 2e-4

def throughput_percent_degradation(x, pre_resp_time=1, pre_resp_txn_time=1, post_resp_time=1, post_resp_txn_time=1, cpu_utilization_base=1, cpu_utilization_txn=1):
    n = no_agent_throughput(x, pre_resp_time, pre_resp_txn_time, post_resp_time, post_resp_txn_time, cpu_utilization_base, cpu_utilization_txn)
    w = with_agent_throughput(x, pre_resp_time, pre_resp_txn_time, post_resp_time, post_resp_txn_time, cpu_utilization_base, cpu_utilization_txn)
    
    percent_degradation = 100 * numpy.abs((w - n) / (w+n))
    return set_percentile_ceiling(percent_degradation, tpr_ymin, 2*tpr_ymax)
    
fig = make_quick_plot(
        xr, throughput_percent_degradation,
        ax='Requests Per Minute',
        ay="Percent Throughput Degradation",
        ysc_min=0,
        ysc_max=tpr_ymax - 1,
        xscale=LogScale,
        defaults={
            'pre_resp_time': default_pre_resp_time,
            'pre_resp_txn_time': default_pre_resp_txn_time,
            'post_resp_time': default_post_resp_time,
            'post_resp_txn_time': default_post_resp_txn_time,
            'cpu_utilization_base': default_cpu_utilization_base,
            'cpu_utilization_txn': default_cpu_utilization_txn,
        })

VBox(children=(SelectionSlider(description='Pre_Resp_Time', index=3, options=(0, 0.001, 0.002, 0.005, 0.01, 0.…

Figure(axes=[Axis(label='Requests Per Minute', scale=LogScale(max=10000.0, min=10.0)), Axis(label='Percent Thr…