# Experiment 4: Impact of Datacenter Scheduler

> What happens when the scheduling policy changes?
Single topology, single workload, multiple scheduling policies
Will a change to my datacenter increase risk? Is it worth the risk?

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import radice

FIGURE_PATH = 'figures/04-scheduler'

os.makedirs(FIGURE_PATH, exist_ok=True)
radice.set()

## Traditional Allocation Policies

The experimental results are obtained by running the following command
```bash
bin/radice run -r 4096 -p 80 portfolios/scheduler.yml -P portfolio=scheduler
```

In [None]:
risk = pd.read_parquet('data/risk/portfolio=scheduler',  columns=['scheduler', 'seed', 'timestamp', 'id', 'cost'], read_dictionary=['id'], buffer_size=4096)
risk

In [None]:
risk_pm = radice.compute_monthly_risk(risk, keys=['scheduler'])
risk_pm

In [None]:
fig, ax = plt.subplots(figsize=radice.figsize(2.3))

order = [
    'default',
    'mem',
    'core-mem',
    'provisioned-cores',
    'active-servers',
    'mem-inv',
    'core-mem-inv',
    'provisioned-cores-inv',
    'active-servers-inv',
    'combo',
    'random',
]
ylabels = [
    "Baseline",
    "Free Memory (Max)",
    "Free Memory per pCPU (Max)",
    "Free vCPUs (Max)",
    "VM Count (Min)",
    "Free Memory (Min)",
    "Free Memory per CPU (Min)",
    "Free vCPUs (Min)",
    "VM Count (Max)",
    "Combo",
    "Random",
]

#hue_order = ["customer", "company", "society"]

ax = sns.boxplot(data=risk_pm, x="cost", y="scheduler", ax=ax, order=order, showfliers=False, showmeans=True, palette=['C0', 'C1'])

ax.set_ylabel("Scheduler")
ax.set_xlabel("Risk per Month (€)")
ax.set_yticklabels(ylabels, ha='left')
ax.get_yaxis().set_tick_params(pad=120)
ax.set_xlim(left=0, right=12800)
ax.xaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

fig.savefig(os.path.join(FIGURE_PATH, 'scheduler-influence.pdf'))

In [None]:
group = risk['id'].str.slice(stop=2)
group

risk_pm_group = radice.compute_monthly_risk(risk, keys=['scheduler', group])
risk_pm_group

In [None]:
fig, ax = plt.subplots(1, 3, figsize=radice.figsize(2.5), sharey=True)

order = [
    'default',
    'mem',
    'core-mem',
    'provisioned-cores',
    'active-servers',
    'mem-inv',
    'core-mem-inv',
    'provisioned-cores-inv',
    'active-servers-inv',
    'combo',
    'random',
]
ylabels = [
    "Baseline",
    "T1",
    "T2",
    "T3",
    "T4",
    "T1-R",
    "T2-R",
    "T3-R",
    "T4-R",
    "Combo",
    "Random",
]


sns.boxplot(data=risk_pm_group[risk_pm_group['id'] == 'cu'], x='cost', y='scheduler', ax=ax[0], order=order, showfliers=False, showmeans=True, color=sns.color_palette()[0])
sns.boxplot(data=risk_pm_group[risk_pm_group['id'] == 'co'], x='cost', y='scheduler', ax=ax[1], order=order, showfliers=False, showmeans=True, color=sns.color_palette()[1])
sns.boxplot(data=risk_pm_group[risk_pm_group['id'] == 'so'], x='cost', y='scheduler', ax=ax[2], order=order, showfliers=False, showmeans=True, color=sns.color_palette()[2])

ax[0].set_ylabel("Scheduler")
ax[0].set_xlabel("Customer")
ax[0].set_yticklabels(ylabels)

ax[1].set_ylabel("")
ax[1].set_xlabel("Company")
ax[1].set_yticklabels(ylabels)
ax[1].set_xlim(left=0, right=6000)

ax[2].set_ylabel("")
ax[2].set_xlabel("Society")
ax[2].set_yticklabels(ylabels)
ax[2].set_xlim(left=0, right=3000)

fig.supxlabel('Incurred Monthly Cost (€)', fontweight='bold')

fig.align_ylabels(ax)
fig.savefig(os.path.join(FIGURE_PATH, 'scheduler-groups.pdf'))

In [None]:
res = risk_pm.groupby(['scheduler'])['cost'].mean()
res.max() - res.min()

In [None]:
res = risk_pm.groupby(['scheduler'])['cost'].quantile(0.25)
res.max() - res.min()

In [None]:
res = risk_pm.groupby(['scheduler'])['cost'].quantile(0.75)
res.max() - res.min()

In [None]:
risk_pm.groupby(['scheduler'])['cost'].quantile(0.75).rank()
