# Chapter 6. Performance analysis by server elimination

This Notebook computes and plots the numerical results for Chapter 6 of the Ph.D. thesis. Sections 1 and 2 of this Notebook define functions for computing the results, while Sections 3 and 4 plot them. For each section, we add a pointer to the corresponding section or subsection in the manuscript.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
folder = "data/"

## 1. Random customer assignment (Section 6.2)

### Homogeneous queue (Subsection 6.2.1)

$$
\psi = \prod_{s=r}^S \left( 1 - \rho_{|s} \right)
\quad \text{and} \quad
L = \sum_{s=r}^S \frac{ \rho_{|s} }{ 1 - \rho_{|s} },
$$
where
$$
\rho_{|s}
= \frac1{s \mu} \frac{ \binom{s}{r} }{ \binom{S}{r} } S \lambda
= \rho \frac{ \binom{s-1}{r-1} }{ \binom{S-1}{r-1} }
$$
is the load in the subsystem restricted to $s$ arbitrary servers.

We just need to compute
$$
    b_s = \binom{s-1}{r-1}
$$
for each $s = r,\ldots,S$.
We have $b_r = 1$ and, for each $s = r+1,\ldots,S$,
$$
    b_s
    = \frac{ (s-1)! }{ (r-1)! (s-r)! }
    = \frac{s-1}{s-r} \frac{ ((s-1)-1)! }{ (r-1)! ((s-1)-r)! }
    = \left( 1 + \frac{r-1}{s-r} \right) b_{s-1}.
$$
We use this last equality to compute recursively $b_s$ for each $s = r+1,\ldots,S$.

In [None]:
def global_homogeneous(S=10, r=3, ρ=.5):
    binom = np.ones(S - r + 1, dtype=float)
    binom[1:] = np.cumprod(1 + (r-1) / np.arange(1, S - r + 1))
    binom /= binom[-1]
    ρs = ρ * binom
    return np.sum(ρs / (1 - ρs))

### Heterogeneous degrees (Subsection 6.2.2)

$$
\psi = \prod_{s=r}^S \left( 1 - \rho_{|s} \right)
\quad \text{and} \quad
L = \sum_{s=r}^S \frac{ \rho_{|s} }{ 1 - \rho_{|s} },
$$
where
$$
\rho_{|s} = \frac1{ s \mu }
\sum_{k=1}^K \frac{ \binom{s}{r_k} }{ \binom{S}{r_k} } S \lambda p_k
= \rho \sum_{k=1}^K \frac{ \binom{s-1}{r_k-1} }{ \binom{S-1}{r_k-1} } p_k
$$
is the load in the queue restricted to $s$ arbitrary servers.

Implementation: We let
$$
b_{k,s} = p_k \frac{ \binom{s}{r_k} }{ \binom{S}{r_k} },
$$
and we compute it by induction on $s$ for each $k$, using the formula:
$$
\binom{s}{r} = \left( 1 + \frac{r-1}{s-r} \right) \times \binom{s-1}{r}.
$$
Then we let $w_s = b_{1,s} + \ldots + b_{K,s}$ for each $s = 1,\ldots,S$.

We can express all the metrics as functions of these quantities:
\begin{align*}
\rho_s &= \rho \times w_s,
&\qquad \psi &= \prod_{s=1}^S (1 - \rho_{s}),
&\qquad L &= \sum_{s=1}^S \frac{ \rho_{|s} }{ 1 - \rho_{|s} }, \\
\rho_{k|s} &= \frac{ \rho \times b_{k,s} }{ 1 - \rho (w_s - b_{k,s}) },
&&
&\qquad L_k &= \sum_{s=r_k}^S \frac{ \rho_{k|s} }{ 1 - \rho_{k|s} }
\end{align*}

In [None]:
def global_heterogeneous(S, r, p, ρ):
    K = len(r);
    b = np.zeros((K, S), dtype=float)
    for k in range(K):
        b[k, r[k] - 1] = 1.
        b[k, r[k]:] = np.cumprod(1 + (r[k] - 1) / np.arange(1, S - r[k] + 1))
        b[k, :] *= p[k] / b[k, -1]
    w = np.sum(b, axis=0)  # sum for all k = 1,...,K

    Ln = np.zeros(K)

    ρs = ρ * w
    ψ = np.prod(1. - ρs)
    L = np.sum(ρs / (1. - ρs))
    for k in range(K):
        ρns = ρ * b[k, :] / (1. - ρ * (w - b[k, :]))
        Ln[k] = np.sum(ρns / (1. - ρns))

    return ψ, L / S / ρ, Ln / S / ρ / p

## 2. Local assignment (Section 6.3)

### Random customer assignment (Subsection 6.3.3)

In a homogeneous, local, randomized load balancing system of parameters $ S $, $ r $ and $ \rho =\frac{\lambda}{\mu} $, the stability condition is $ \rho < 1 $. Defining $ \rho_{|s} = \frac{1-\frac{r-1}{s}}{1-\frac{r-1}{S}}\rho$, the probability that the system is empty is $\psi = \psi_{|1..S}$ , with
	$$
	\psi_{|1..s} = \left\{
	\begin{array}{l}
	1 \text{ if } s < r\text{,}\\
	(1-\rho_{|s})\frac{s}{\sum_{t=1}^{s}\frac{1}{\psi_{|1..t-1}\psi_{|1..s-t}}}
	\text{ otherwise.}
	\end{array}
	 \right.
	$$

In [None]:
def line_ψ(S=10, r=3, ρ=.5):
    ψ = np.ones(S + 1)
    for s in range(r, S + 1):
        ρs = ρ * (1 - (r - 1) / s) / (1 - (r - 1) / S)
        ψ[s] = (1 - ρs) * s / np.sum([1 / ψ[t-1] / ψ[s-t] for t in range(1, s+1)])
    return ψ

For each $t \in 1..S-r+1$, the mean number of class-$t$ jobs in the system is $L_{t} = L_{t|1..S}$, with
\begin{equation}
L_{t|1..s} =
\frac{\frac{\rho_{|S}}{1-\frac{r-1}{S}}		
+\psi_{|1..s}\left(
\sum\limits_{u=1}^{t-1}\frac{L_{t-u|1..\ell-u}}{\psi_{|1..u-1}\psi_{|1..s-u}}
	+
\sum\limits_{u=t+r}^{s}\frac{L_{t|1..u-1}}{\psi_{|1..u-1}\psi_{|1..s-u}}
\right)
}{\ell(1-\rho_{|s})}\text{,}
\end{equation}

In [None]:
def line_Li(S=10, r=3, ρ=.5):
    ψ = line_ψ(S, r, ρ)
    Lt = np.zeros([S - r + 2, S + 1])
    for t in range(1, S - r + 2):
        for s in range(t + r - 1, S + 1):
            ρs = ρ * (1 - (r - 1) / s) / (1 - (r - 1) / S)
            Lt[t, s] = ((ρ / (1 - (r - 1) / S)
                        + ψ[s] * (np.sum([Lt[t-u, s-u] / ψ[u-1] / ψ[s-u] for u in range(1, t)])
                                  + np.sum([Lt[t, u-1] / ψ[u-1] / ψ[s-u] for u in range(t+r, s+1)])))
                       / s / (1 - ρs))
    return Lt[1:,-1]

The total mean number of jobs in the system is $L = L_{|1..S}$, with
\begin{equation}
L_{|1..s} = \frac{
		\rho_{|s}+\frac{\psi_{|1..s}}{s}\sum_{t=1}^{s}\frac{L_{|1..t-1}+L_{|1..s-t}}{\psi_{|1..t-1}\psi_{|1..s-t}}
	}{1-\rho_{|s}
}\text{.}
\end{equation}

We check with a specific function that it matches the Lis.

In [None]:
def line_L(S=10, r=3, ρ=.5):
    ψ = line_ψ(S, r, ρ)
    L = np.zeros(S + 1)
    for s in range(r, S + 1):
        ρs = ρ * (1 - (r - 1) / s) / (1 - (r - 1) / S)
        L[s] = (ρs + ψ[s] / s * np.sum([(L[t-1] + L[s-t]) / ψ[t-1] / ψ[s-t]
                                        for t in range(1, s + 1)])) / (1 - ρs)
    return L[-1]

### Ring queue (Subsection 6.3.4)

In [None]:
def ring(S=10, r=3, ρ=.5):
    return ρ / (1 - ρ) + line_L(S - 1, r, (1 - (r - 1) / (S - 1)) * ρ)

## 3. Gain of differentiation (Subsection 6.4.1)

### Impact of load

We first consider a three scenarios to study random customer assignment:
- homogeneous degree 6 for all customers;
- homogeneous degree 12 for all customers;
- degree 6 for half the customer, 12 for the other half.

The rate is computed as a function of $\rho$. The results are stored in a csv file for re-use in other programs.

In [None]:
S=100; step=.001
# Set of parameters to explore
scenarios = [{'name': 'r6', 'degree': [6], 'distrib': [1]},
             {'name': 'r12', 'degree': [12], 'distrib': [1]},
             {'name': 'r6_12', 'degree': [6, 12], 'distrib': [.5, .5]},
            ]
ρρ = np.arange(step, 1, step)
I = len(ρρ)
for s in scenarios:
    s['mean_rate'] = np.zeros(I)
    s['detail_rate'] = np.zeros([len(s['degree']), I])
    for i, ρ in enumerate(ρρ):
        _, wait, detail_wait = global_heterogeneous(S, s['degree'], s['distrib'], ρ)
        s['mean_rate'][i] = 1/wait
        s['detail_rate'][:, i] = 1/detail_wait
results = {'rho': ρρ, **{"mean_%s" % s['name']: s['mean_rate'] for s in scenarios },
               **{ "detail_%s_r%s" % (s['name'], r): s['detail_rate'][i] 
                  for s in scenarios for i, r in enumerate(s['degree'])} }
csv = pd.DataFrame(results)
fn = folder + "hetero_1_" + str(S) + ".csv"
csv.to_csv(fn, index=False)

Display of the results.

In [None]:
# Set of parameters to display
display = [('mean_r6', 'Degree 6, homogeneous'),
           ('mean_r12', 'Degree 12, homogeneous'),
           ('detail_r6_12_r12', 'Degree 12, 6-12 mix'),
           ('detail_r6_12_r6', 'Degree 6, 6-12 mix'),]

for d in display:
    plt.plot(results['rho'], results[d[0]], label=d[1])
plt.xlim([0, 1]); plt.ylim(ymin=0)
plt.xlabel('ρ'); plt.ylabel('Service rate')
plt.legend(loc='best')
plt.show()    

### Impact of population distribution

Here we focus on a mixed degree 6 / degree 12 population, but now the load is fixed ($0.9$ or $0.99$) and the 6-12 proportion is the parameter. Results are stored in a csv for further use.

In [None]:
S = 100
step = .001
low_degree_proportion = np.arange(step, 1, step)
degrees = [6, 12]

# Parameters to explore
scenarios = [{'name': 'Moderately saturated charge', 'rho': .9},
             {'name': 'Heavily saturated charge', 'rho': .99}]

I = len(low_degree_proportion)
for s in scenarios:
    for r in degrees:
        s[r] = np.zeros(I)
    for i, p in enumerate(low_degree_proportion):
        _, _, wait = global_heterogeneous(S, degrees, [p, 1-p], s['rho'])
        for j, r in enumerate(degrees):
            s[r][i] = 1/wait[j]

results = {'ld_proportion': low_degree_proportion, 
           **{"rho_%s_r_%s" % (s['rho'], r): s[r] for s in scenarios for r in degrees},
          }

csv = pd.DataFrame(results)
fn = folder + "hetero_2_" + str(S) + ".csv"
csv.to_csv(fn, index=False)

In [None]:
for s in scenarios:
    for r in degrees[::-1]:
        plt.plot(results['ld_proportion'], results["rho_%s_r_%s" % (s['rho'], r)],
                label = "%s, degree %s" % (s['name'], r))
plt.xlim([0, 1]); plt.ylim(ymin=0)
plt.xlabel('Low degree proportion'); plt.ylabel('Service rate')
plt.legend(loc='best')
plt.show()        

## 4. Impact of locality (Subsection 6.4.2)

### Cost of heterogeneity and locality

Here for given size $S$, degree $r$, and load $\rho$, one compare the service r of random global assignment, random contiguous assignment on a line, and random contiguous assignment on a ring. As the line classes are not equivalent, the rate of each class is displayed. Results are stored in csv files.

In [None]:
def load_study(S=100, r=10, ρ = .9):
    c = np.arange(1, S - r + 2)
    Li = line_Li(S, r, ρ) * (S - r + 1) / S / ρ
    L = line_L(S, r, ρ) / S / ρ * np.ones(S - r + 1)
    R = ring(S, r, ρ) / S / ρ * np.ones(S - r + 1)
    G = global_homogeneous(S, r, ρ) / S / ρ * np.ones(S - r + 1)
    csv = pd.DataFrame({'c': c, 'Li': Li, 'L': L, 'R': R, 'G': G})
    csv.to_csv(folder + "load_study_S_" + str(S) + "_r_" + str(r) + "_rho_" + str(ρ) + ".csv",
               index=False)

In [None]:
def plot_local_study(S=100, r=10, ρ=.9):
    try:
        csv = pd.read_csv(folder + "load_study_S_" + str(S)
                          + "_r_" + str(r) + "_rho_" + str(ρ) + ".csv")
    except:
        load_study(S, r, ρ)
        csv = pd.read_csv(folder + "load_study_S_" + str(S)
                          + "_r_" + str(r) + "_rho_" + str(ρ) + ".csv")
    
    plt.plot(csv['c'].values, 1. / csv['G'].values, label="Global")
    plt.plot(csv['c'].values, 1. / csv['R'].values, label="Ring")
    plt.plot(csv['c'].values, 1. / csv['L'].values, label="Line (Mean)")
    plt.plot(csv['c'].values, 1. / csv['Li'].values, label="Line (Classes)")
    plt.xlabel('Line class')
    plt.ylabel('Service rate')
    plt.xlim([1, S - r + 1])
    plt.legend(loc='best')
    plt.show()

In [None]:
plot_local_study(S=100, r=8, ρ=.9)

### Impact of the parameters

In the next three computations, we fix two parameters amongst $S$, $r$, and $\rho$, and we study the impact of the third parameter.

#### Load $\rho$

In [None]:
def load_study_rho(S=100, r=10, step=.001):
    ρρ = np.arange(step, 1, step)
    I = len(ρρ)
    L = np.zeros(I)
    R = np.zeros(I)
    G = np.zeros(I)
    
    for i, ρ in enumerate(ρρ):
        norm = S * ρ
        L[i] = line_L(S, r, ρ) / norm
        R[i] = ring(S, r, ρ) / norm
        G[i] = global_homogeneous(S, r, ρ) / norm
        
    csv = pd.DataFrame({'rho': ρρ, 'L': L, 'R': R, 'G': G})
    csv.to_csv(folder + "load_study_rho_S_" + str(S) + "_r_" + str(r) + ".csv",
               index=False)

In [None]:
def plot_local_study_rho(S=100, r=10, step=.001):
    try:
        csv = pd.read_csv(folder + "load_study_rho_S_" + str(S) + "_r_" + str(r) + ".csv")
    except:
        load_study_rho(S, r, step)
        csv = pd.read_csv(folder + "load_study_rho_S_" + str(S) + "_r_" + str(r) + ".csv")
        
    plt.plot(csv['rho'].values, 1. / csv['G'].values, label="Global")
    plt.plot(csv['rho'].values, 1. / csv['R'].values, label="Ring")
    plt.plot(csv['rho'].values, 1. / csv['L'].values, label="Line")
    
    plt.xlim([0,1]); plt.ylim(0, 10)
    plt.legend(loc='best')
    plt.show()

In [None]:
plot_local_study_rho(100, 10, step=.001)

#### Parallelism degree $r$

In [None]:
def load_study_r(S=100, ρ=.9):
    rr = np.arange(1, S+1)
    I = len(rr)
    L = np.zeros(I)
    R = np.zeros(I)
    G = np.zeros(I)
    
    for i, r in enumerate(rr):
        norm = S * ρ
        L[i] = line_L(S, r, ρ) / norm
        R[i] = ring(S, r, ρ) / norm
        G[i] = global_homogeneous(S, r, ρ) / norm
        
    csv = pd.DataFrame({'r': rr, 'L': L, 'R': R, 'G': G})
    csv.to_csv(folder + "load_study_r_S_" + str(S) + "_rho_" + str(ρ) + ".csv",
               index=False)

In [None]:
def plot_local_study_r(S=100, ρ=.9):
    try:
        csv = pd.read_csv(folder + "load_study_r_S_" + str(S) + "_rho_" + str(ρ) + ".csv")
    except:
        load_study_r(S, ρ)
        csv = pd.read_csv(folder + "load_study_r_S_" + str(S) + "_rho_" + str(ρ) + ".csv")
        
    plt.plot(csv['r'].values, 1. / csv['G'].values, label="Global")
    plt.plot(csv['r'].values, 1. / csv['R'].values, label="Ring")
    plt.plot(csv['r'].values, 1. / csv['L'].values, label="Line")
    
    plt.xlim([1,S]); plt.ylim(0, 10)
    plt.legend(loc='best')
    plt.show()

In [None]:
plot_local_study_r(100, .9)

#### Number $S$ of servers

In [None]:
def load_study_S(Smax=100, r=10, ρ=.9):
    SS = np.arange(r + 1, Smax + 1)
    I = len(SS)
    L = np.zeros(I)
    R = np.zeros(I)
    G = np.zeros(I)
    
    for i, S in enumerate(SS):
        norm = S * ρ
        L[i] = line_L(S, r, ρ) / norm
        R[i] = ring(S, r, ρ) / norm
        G[i] = global_homogeneous(S, r, ρ) / norm
        
    csv = pd.DataFrame({'S': SS, 'L': L, 'R': R, 'G': G})
    csv.to_csv(folder + "load_study_S_Smax_" + str(Smax)
               + "_r_" + str(r) + "_rho_" + str(ρ) + ".csv",
               index=False)

In [None]:
def plot_local_study_S(Smax=100, r=10, ρ=.9):
    try:
        csv = pd.read_csv(folder + "load_study_S_Smax_" + str(Smax)
                          + "_r_" + str(r) + "_rho_" + str(ρ) + ".csv")
    except:
        load_study_S(Smax, r, ρ)
        csv = pd.read_csv(folder + "load_study_S_Smax_" + str(Smax)
                          + "_r_" + str(r) + "_rho_" + str(ρ) + ".csv")
        
    plt.plot(csv['S'].values, 1 / csv['G'].values, label="Global")
    plt.plot(csv['S'].values, 1 / csv['R'].values, label="Ring")
    plt.plot(csv['S'].values, 1 / csv['L'].values, label="Line")
    
    plt.xlim([r + 1, Smax]); plt.ylim(ymin=0)
    plt.legend(loc='best')
    plt.show()

In [None]:
plot_local_study_S(300)