In [1]:
import sys
sys.path.append("../") # go to parent dir
# imports
import simpy
from loguru import logger
import time
from cluster_simulator.utils import convex_hull
from cluster_simulator.cluster import Cluster, Tier, EphemeralTier, bandwidth_share_model, compute_share_model, get_tier, convert_size
from cluster_simulator.phase import DelayPhase, ComputePhase, IOPhase
from cluster_simulator.application import Application
from cluster_simulator.analytics import display_run
from cluster_simulator.ephemeral_placement import ClusterBlackBox


Could not import matplotlib.pyplot, therefore ``cma.plot()`` etc. is not available



Setting the Cluster

In [2]:
# sim env an data
env = simpy.Environment()
data = simpy.Store(env)
# tier perfs
nvram_bandwidth = {'read':  {'seq': 800, 'rand': 800},
                        'write': {'seq': 400, 'rand': 400}}
ssd_bandwidth = {'read':  {'seq': 200, 'rand': 200},
                    'write': {'seq': 100, 'rand': 100}}
hdd_bandwidth = {'read':  {'seq': 80, 'rand': 80},
                    'write': {'seq': 40, 'rand': 40}}

# registering Tiers
hdd_tier = Tier(env, 'HDD', bandwidth=hdd_bandwidth, capacity=1e12)
ssd_tier = Tier(env, 'SSD', bandwidth=ssd_bandwidth, capacity=200e9)
nvram_tier = Tier(env, 'NVRAM', bandwidth=nvram_bandwidth,
                        capacity=10e9)
# registering Ephemeral Tier
bb = EphemeralTier(env, name="BB", persistent_tier=hdd_tier,
                        bandwidth=nvram_bandwidth, capacity=10e9)

# Define the cluster with 1 persistent and 1 ephemeral
cluster = Cluster(env, tiers=[hdd_tier], ephemeral_tier=bb)

Behavior for no SBB app

In [3]:
# logger
logger.remove()
# Simple app: read 1GB -> compute 10s -> write 5GB
compute = [0,  10]
read = [1e9, 0]
write = [0, 5e9]
placement = [0, 0]
use_bb = [False, False]
app1 = Application(env, compute=compute, read=read, write=write, data=data)
env.process(app1.run(cluster, placement=placement, use_bb=use_bb))
env.run()
print(f"application duration = {app1.get_fitness()}")
fig = display_run(data, cluster, width=800, height=900)
fig.show()


application duration = 147.5


In [4]:
# logger
# sim env an data
data = simpy.Store(env)
logger.remove()
# Simple app: read 1GB -> compute 10s -> write 5GB
compute = [0,  10]
read = [1e9, 0]
write = [0, 5e9]
placement = [0, 0]
use_bb = [False, True]
app = Application(env, compute=compute, read=read, write=write, data=data)
env.process(app.run(cluster, placement=placement, use_bb=use_bb))
env.run()
print(f"application duration = {app.get_fitness()}")
fig = display_run(data, cluster, width=800, height=900)
fig.show()


application duration = 47.5


In [5]:
# logger
# sim env an data
data = simpy.Store(env)
logger.remove()
# Simple app: read 1GB -> compute 10s -> write 5GB
compute = [0,  10]
read = [1e9, 0]
write = [0, 5e9]
placement = [0, 0]
use_bb = [True, False]
app = Application(env, compute=compute, read=read, write=write, data=data)
env.process(app.run(cluster, placement=placement, use_bb=use_bb))
env.run()
print(f"application duration = {app.get_fitness()}")
fig = display_run(data, cluster, width=800, height=900)
fig.show()

application duration = 136.25


Optimizing BB Size (flavor)

In [16]:
import numpy as np
import pandas as pd
pathfile = os.path.join(os.getcwd(), "flavor_optim")
df = pd.read_pickle(pathfile)
df.head()
#list(df.columns)

Unnamed: 0,Param,App#1 tier place,App#1 use bb,App#2 tier place,App#2 use bb,App#3 tier place,App#3 use bb,Fitness,BB_size
0,"[0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, ...","[0.0, 1.0]","[True, True]","[0.0, 0.0, 1.0]","[False, True, False]","[0.0, 1.0]","[True, False]",293.809524,15000000000.0
1,"[1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[1.0, 0.0]","[False, False]","[1.0, 1.0, 0.0]","[True, False, True]","[1.0, 0.0]","[False, True]",206.624939,20000000000.0
2,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0]","[True, True]","[0.0, 1.0, 0.0]","[False, True, True]","[1.0, 0.0]","[True, True]",193.78326,32000000000.0
3,"[1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, ...","[1.0, 1.0]","[False, True]","[0.0, 1.0, 1.0]","[False, False, False]","[1.0, 1.0]","[False, False]",393.511905,5000000000.0
4,"[0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, ...","[0.0, 0.0]","[False, True]","[1.0, 1.0, 1.0]","[True, False, False]","[1.0, 1.0]","[False, False]",400.347985,8000000000.0


Plotting Workflow duration Vs BB Size : efficient frontier


In [22]:
import plotly.express as px
import plotly.graph_objects as go
colors = px.colors.qualitative.Plotly
fig = go.Figure()
fig.add_traces(go.Scatter(x=df['BB_size'], y = df['Fitness'], 
                          text = ["trial_index=" + str(i) for i in list(df.index)],
                          mode = 'markers', line=dict(color=colors[0])))
fig.update_xaxes(title_text='Size of BB in bytes (B = 1e9 = GB)')
fig.update_yaxes(title_text='Workflow duration in seconds')
fig.show()

Efficient frontier / Pareto frontier

In [23]:
points = []
for (xi, yi) in zip(df['BB_size'].tolist(), (df['Fitness']).tolist()):
    points.append((xi,yi))
lower = convex_hull(points)
fig.add_traces(go.Scatter(x=np.array([low[0] for low in lower]),
                            y=np.array([low[1] for low in lower])))
fig.show()

Display individual points (placement recommandation)

In [None]:
idx = 21
df.loc[21]["Param"]

array([1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 1., 1.])

In [None]:
cbb = ClusterBlackBox()
fig = cbb.display_placement(placement=df.loc[21]["Param"].tolist()) # solution found by bbopt
fig.show()

Displaying result for placement parameter = [1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0]
    | app#1 : tier placement: [1.0, 0.0] | use_bb = [False, False]
    | app#2 : tier placement: [0.0, 1.0, 0.0] | use_bb = [True, False, True]
    | app#3 : tier placement: [1.0, 0.0] | use_bb = [False, True]
    | runtime = 206.62493924629848 |  BB_size = 20.0 GB


In [24]:
cbb = ClusterBlackBox()
fig = cbb.display_placement(placement=df.loc[59]["Param"].tolist()) # solution found by bbopt
fig.show()
cbb = ClusterBlackBox()
fig = cbb.display_placement(placement=df.loc[24]["Param"].tolist()) # solution found by bbopt
fig.show()


Displaying result for placement parameter = [1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
    | app#1 : tier placement: [1.0, 1.0] | use_bb = [False, False]
    | app#2 : tier placement: [1.0, 0.0, 1.0] | use_bb = [False, True, False]
    | app#3 : tier placement: [0.0, 1.0] | use_bb = [True, False]
    | runtime = 308.0952380952381 |  BB_size = 9.0 GB


Displaying result for placement parameter = [1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0]
    | app#1 : tier placement: [1.0, 0.0] | use_bb = [False, False]
    | app#2 : tier placement: [1.0, 1.0, 0.0] | use_bb = [False, True, False]
    | app#3 : tier placement: [1.0, 0.0] | use_bb = [True, False]
    | runtime = 657.9853479853481 |  BB_size = 9.0 GB


Get the best placement


In [26]:
# imports for surrogate models
from sklearn.gaussian_process import GaussianProcessRegressor
from bbo.optimizer import BBOptimizer
# from bbo.optimizer import timeit
from bbo.heuristics.surrogate_models.next_parameter_strategies import expected_improvement

# imports for genetic algorithms
from bbo.heuristics.genetic_algorithm.selections import tournament_pick
from bbo.heuristics.genetic_algorithm.crossover import double_point_crossover
from bbo.heuristics.genetic_algorithm.mutations import mutate_chromosome_to_neighbor
from loguru import logger

logger.remove()
cbb = ClusterBlackBox()
PARAMETER_SPACE = cbb.parameter_space
# combinations are self.n_tiers ** sum(self.ios) + 2**sum(self.ios)
NBR_ITERATION = 50  # cbb.n_tiers ** sum(cbb.ios)

np.random.seed(5)
bbopt = BBOptimizer(black_box=cbb,
                    heuristic="surrogate_model",
                    max_iteration=NBR_ITERATION,
                    initial_sample_size=60,
                    parameter_space=PARAMETER_SPACE,
                    next_parameter_strategy=expected_improvement,
                    regression_model=GaussianProcessRegressor)
start_time = time.time()
bbopt.optimize()

Full BBO param array = [0. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 0. 1.]
    | app#1 : tier placement: [0. 1.] | use_bb = [True, True]
    | app#2 : tier placement: [0. 0. 1.] | use_bb = [False, True, False]
    | app#3 : tier placement: [0. 1.] | use_bb = [True, False]
    | runtime = 293.80952380952385 |  BB_size = 15.0 GB
Full BBO param array = [1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0.]
    | app#1 : tier placement: [1. 0.] | use_bb = [False, False]
    | app#2 : tier placement: [1. 1. 0.] | use_bb = [True, False, True]
    | app#3 : tier placement: [1. 0.] | use_bb = [False, True]
    | runtime = 206.62493924629848 |  BB_size = 20.0 GB
Full BBO param array = [0. 0. 0. 1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1.]
    | app#1 : tier placement: [0. 0.] | use_bb = [True, True]
    | app#2 : tier placement: [0. 1. 0.] | use_bb = [False, True, True]
    | app#3 : tier placement: [1. 0.] | use_bb = [True, True]
    | runtime = 193.7832604288915 |  BB_size = 32.0 GB
Full BBO param array = [1. 1. 0. 1. 1. 

array([1., 0., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1.])

In [27]:
fig1 = cbb.display_placement(placement=bbopt.best_parameters_in_grid)
fig1.show()

Displaying result for placement parameter = [1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1.]
    | app#1 : tier placement: [1. 0.] | use_bb = [True, False]
    | app#2 : tier placement: [0. 1. 1.] | use_bb = [True, True, True]
    | app#3 : tier placement: [1. 1.] | use_bb = [True, True]
    | runtime = 173.01419858458695 |  BB_size = 30.0 GB
