In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [3]:
import numpy
import scipy.stats as st
import math
from bqplot import LinearScale, OrdinalColorScale, ColorAxis, Axis, Lines, Figure

from ipywidgets import HBox, VBox, FloatSlider, FloatLogSlider, SelectionSlider

In [3]:
%%html
<style>
.MathJax {float:left !important;}
.js-plot-link-container { display:none !important; }
</style>

# Purpose

The purpose of this notebook is to determine the tradeoffs of compressing the encoding the Better CAT's transaction event priority field, `pr`.

## Advantages

The main advantage of compressed priority encoding would be a per-packet byte savings. Currently, we use 20 bytes to encode prioirty as a 16 decimal-digit value, e.g. "0.1242135235235235" - 2 for the quotation marks, 1 for the digit before the decimal, 1 for the decimal, and 16 after the decimal. If, for example, we were to change that to a 1-digit integer (i.e. a priority could only have a value between 0 to 9, so "0.1242135235235235" turns into just 1), we would only need 1 byte per packet.


## Disadvantages

I can think of three possible disadvantages here: version incompatibility, feature obstacles, and potential dropped trace spans that would not have been dropped had the priority value been encoded with more digits.


* __Version Incompatibility__.

  Encoding the `pr` field would cause problems if interfacing with a non-NR app that doesn't encode its priorities.

  If, for example, we were to encode priority as an integer between, say, 0 and 999, and we receive requests from a system whose priorities are encoded as 16 decimal-digit floats, then those packets would be completely buried.  



* __Feature Obstacles__.

  Reducing the total possible `pr` values gives less design space to implement future features.

  If we were, for example, to encode priority as a 1 digit integer, then we only have 10 possible values for the priority. In order to implement the `sampled=True` feature (where some samples are assigned an increased priority to ensure all spans of the trace are recorded), we would actually be forced to only encode priority as a digit from 0 to 4, as we would need to use the other half of the available values to give space to the super-sampled values. For example, a priority whose original value was 0.5242135235235235 with `sampled=False` would need be encoded to a single-digit integer as 2 so that there would be "room" to allow its `sampled=True` value to be 7.  


While the first two of these problems are more abstract in nature, we can calculate an answer for the third one as a probability that a span will be improperly dropped due to compressed encoding.

## Calculating the Probability of Dropping a Trace Due to Compressed Encoding

First, some definitions:
    
$$P_{DCE}(x) = Probability\:that\:a\:"$$

In [4]:
#digits = [1, 5, 9]

#raw_sizes = [2.5, 3, 3.5, 4]
#sizes = [int(10**x) for x in raw_sizes]

logstep_throughput = 0.25
log_throughputs = [2, 2.25, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4]
throughputs = [math.floor(10**x) for x in log_throughputs]

jitters = [10.0, 45.0, 70.0, 140.0]
cluster_ratios = [1.0, 10.0, 100.0]

In [5]:
def EMG_cdf(x, mu, stddev, rate):
    u = rate*(x-mu)
    v = rate*stddev
    
    t1 = st.norm.cdf(u, 0, v)
    t2 = st.norm.cdf(u, v*v, v)
    t3 = t2*numpy.exp(-u + v*v/2)
    
    return t1 - t3

def prob_overlap(x, jitter, throughput, cluster_ratio):
    reqs_per_ms = throughput/60.0/1000.0
    mu = reqs_per_ms/cluster_ratio
    return EMG_cdf(x, mu, jitter, reqs_per_ms)

In [6]:
chosen_cro = cluster_ratios[2]
xdata = numpy.arange(-4.0, 4.0, 0.01)
ydata = prob_overlap(xdata, jitters[0], throughputs[0], chosen_cro)
  
x_sc = LinearScale(min=min(xdata), max=max(xdata))
y_sc = LinearScale(min=-0.1, max=1.1)
ax_x = Axis(label='x', scale=x_sc, grid_lines='solid')
ax_y = Axis(label='y', scale=y_sc, orientation='vertical', side='left', grid_lines='solid')
prob_line = Lines(x=xdata, y=ydata, colors=['Gray'], scales={'x': x_sc, 'y': y_sc}, visible=True)

throughputs_slider = FloatLogSlider(value=throughputs[0], min=log_throughputs[0], max=log_throughputs[-1], step=logstep_throughput, description="Throughput")
jitters_slider = FloatSlider(value=jitters[0], min=jitters[0], max=jitters[-1], step=jitters[1]-jitters[0], description="Network Jitter stddev (ms)")

def on_change(change):
    ydata = prob_overlap(xdata, jitters_slider.value, throughputs_slider.value, chosen_cro)
    ax_y.scale = y_sc
    prob_line.y = ydata
    
throughputs_slider.observe(on_change, names='value')
jitters_slider.observe(on_change, names='value')

fig = Figure(marks=[prob_line], axes=[ax_x, ax_y], title='CDF of Exponentially Modified Gaussian')
VBox([throughputs_slider, jitters_slider, fig])

VBox(children=(FloatLogSlider(value=100.0, description='Throughput', min=2.0, step=0.25), FloatSlider(value=10…

In [25]:
interval_steps = 100

def Ej(x, num_digits, jitter, data_rate, cluster_ratio):
    stddev = numpy.sqrt(data_rate)
    integral_delta_bounds = numpy.linspace(-3*stddev, 3*stddev, interval_steps)
    
    total = []
    for dri in integral_delta_bounds:
        dri_rate = data_rate + dri
        p = prob_overlap(0, jitter, dri_rate, cluster_ratio)
        px = 10**(-2*num_digits)
        poli = 1-st.binom.cdf(0, dri_rate, p*px)
        total.append(poli)
        
    # data_rate*10**(-2*num_digits)
    return total

Ej(1, 6, 150, 10000, 100)

  vals = special.bdtr(k, n, p)


[4.6910475504091664e-09,
 4.693949229306327e-09,
 4.696850908203487e-09,
 4.699752587100647e-09,
 4.702654265997808e-09,
 4.705555944894968e-09,
 4.708457623792128e-09,
 4.711359302689289e-09,
 4.714260981586449e-09,
 4.717162660483609e-09,
 4.721147917052804e-09,
 4.724050262083779e-09,
 4.726952607114754e-09,
 4.729854952145729e-09,
 4.732757297176704e-09,
 4.735659642207679e-09,
 4.738561987238654e-09,
 4.7419480564414584e-09,
 4.7448504014724335e-09,
 4.747752746503409e-09,
 4.750655091534384e-09,
 4.754648452731658e-09,
 4.757551463896448e-09,
 4.760454475061238e-09,
 4.7633574862260275e-09,
 4.766260497390817e-09,
 4.769163508555607e-09,
 4.772066519720397e-09,
 4.774969530885187e-09,
 4.777872542049977e-09,
 4.780775553214767e-09,
 4.7836785643795565e-09,
 4.787679919182608e-09,
 4.791067542697647e-09,
 4.7939712199962514e-09,
 4.796874897294856e-09,
 4.799778574593461e-09,
 4.802682251892065e-09,
 4.80558592919067e-09,
 4.808489606489275e-09,
 4.811393283787879e-09,
 4.81429696