In [1]:
import sys
sys.path.append("../") # go to parent dir
# imports
import simpy
from loguru import logger
import time
import numpy as np
import pandas as pd
from cluster_simulator.utils import convex_hull
from cluster_simulator.cluster import Cluster, Tier, EphemeralTier, bandwidth_share_model, compute_share_model, get_tier, convert_size
from cluster_simulator.phase import DelayPhase, ComputePhase, IOPhase
from cluster_simulator.application import Application
from cluster_simulator.analytics import display_run
from cluster_simulator.ephemeral_placement import ClusterBlackBox
from cluster_simulator.placement_optimizer import PlacementOptimizer


Setting the Cluster:
- with one tier level (HDD)
- with an ephemeral tier:
 

In [5]:
# sim env an data
env = simpy.Environment()
data = simpy.Store(env)
# tier perfs
nvram_bandwidth = {'read':  {'seq': 800, 'rand': 800},
                        'write': {'seq': 400, 'rand': 400}}
ssd_bandwidth = {'read':  {'seq': 200, 'rand': 200},
                    'write': {'seq': 100, 'rand': 100}}
hdd_bandwidth = {'read':  {'seq': 80, 'rand': 80},
                    'write': {'seq': 40, 'rand': 40}}

# registering Tiers
hdd_tier = Tier(env, 'HDD', bandwidth=hdd_bandwidth, capacity=1e12)
ssd_tier = Tier(env, 'SSD', bandwidth=ssd_bandwidth, capacity=200e9)
nvram_tier = Tier(env, 'NVRAM', bandwidth=nvram_bandwidth,
                        capacity=10e9)
# registering Ephemeral Tier
bb = EphemeralTier(env, name="BB", persistent_tier=hdd_tier,
                        bandwidth=nvram_bandwidth, capacity=10e9)

# Define the cluster with 1 persistent and 1 ephemeral
cluster = Cluster(env, tiers=[hdd_tier, ssd_tier], ephemeral_tier=bb)

# define app
read = [1e9, 0]
compute = [0,  10]
write = [0, 5e9]
app1 = Application(env, compute=compute, read=read, write=write, data=data)

# placement
placement = [0, 0]
use_bb = [False, False]

In [4]:
# logger
# sim env an data
data = simpy.Store(env)
logger.remove()
# Simple app: read 1GB -> compute 10s -> write 5GB
compute = [5790,  5960]
read = [0, 0]
write = [1.5e9, 5e9]
placement = [0, 1]
use_bb = [False, False]
app = Application(env, compute=compute, read=read, write=write, data=data)
env.process(app.run(cluster, placement=placement, use_bb=use_bb))
env.run()
print(f"application duration = {app.get_fitness()}")
fig = display_run(data, cluster, width=800, height=900)
fig.show()

application duration = 332.5


Behavior for no SBB app

In [None]:
p1 = PlacementOptimizer(env, data, cluster, [app1])
fig = p1.display_placement(placement + [0, 0])
fig.show()

Selective SBB per dataset

In [None]:
p1 = PlacementOptimizer(env, data, cluster, [app1])
fig = p1.display_placement(placement + [0, 1])
fig.show()

What about prefetch ? (buffering read datasets)

In [None]:
# logger
# sim env an data
data = simpy.Store(env)
logger.remove()
# Simple app: read 1GB -> compute 10s -> write 5GB
compute = [0,  10]
read = [1e9, 0]
write = [0, 5e9]
placement = [0, 0]
use_bb = [True, False]
app = Application(env, compute=compute, read=read, write=write, data=data)
env.process(app.run(cluster, placement=placement, use_bb=use_bb))
env.run()
print(f"application duration = {app.get_fitness()}")
fig = display_run(data, cluster, width=800, height=900)
fig.show()

Burst Buffer Saturation
 - will choose bb_size < 5GB (max used size)
 

In [None]:
# registering Ephemeral Tier
bb = EphemeralTier(env, name="BB", persistent_tier=hdd_tier,
                        bandwidth=nvram_bandwidth, capacity=4.5e9)

# Define the cluster with 1 persistent and 1 ephemeral
cluster = Cluster(env, tiers=[hdd_tier], ephemeral_tier=bb)
# logger
# sim env an data
data = simpy.Store(env)
logger.remove()
# Simple app: read 1GB -> compute 10s -> write 5GB
compute = [0,  10]
read = [1e9, 0]
write = [0, 5e9]
placement = [0, 0]
use_bb = [False, True] # enabling ephemeral tier for second dataset
app = Application(env, compute=compute, read=read, write=write, data=data)
env.process(app.run(cluster, placement=placement, use_bb=use_bb))
env.run()
print(f"application duration = {app.get_fitness()}")
fig = display_run(data, cluster, width=800, height=900)
fig.show()

- eviction occurs
- buffer saturation should be avoided:
  - only newly copied data to lower tier diminishes the amount of dirty data
  - we can only evict clean data
  - in terms of simulations, it shorten the time interval and the loop become extremly slow (~inf)
- the focus is not on studying what happens in saturation mode, but to size relevantly the BB

Optimizing BB Size (flavor)

In [None]:
import simpy
from loguru import logger
import time
import numpy as np
import pandas as pd
import time, os
from itertools import chain
from cluster_simulator.cluster import Cluster, Tier, EphemeralTier, bandwidth_share_model, compute_share_model, get_tier, convert_size
from cluster_simulator.phase import DelayPhase, ComputePhase, IOPhase
from cluster_simulator.application import Application
from cluster_simulator.analytics import display_run
# imports for surrogate models
from sklearn.gaussian_process import GaussianProcessRegressor
from bbo.optimizer import BBOptimizer
# from bbo.optimizer import timeit
from bbo.heuristics.surrogate_models.next_parameter_strategies import expected_improvement

# imports for genetic algorithms
from bbo.heuristics.genetic_algorithm.selections import tournament_pick
from bbo.heuristics.genetic_algorithm.crossover import double_point_crossover
from bbo.heuristics.genetic_algorithm.mutations import mutate_chromosome_to_neighbor
from loguru import logger
from cluster_simulator.ephemeral_placement import ClusterBlackBox
logger.remove()
cbb = ClusterBlackBox()
PARAMETER_SPACE = cbb.parameter_space
# combinations are self.n_tiers ** sum(self.ios) + 2**sum(self.ios)
NBR_ITERATION = 1  # cbb.n_tiers ** sum(cbb.ios)

np.random.seed(5)
bbopt = BBOptimizer(black_box=cbb,
                    heuristic="surrogate_model",
                    max_iteration=NBR_ITERATION,
                    initial_sample_size=2,
                    parameter_space=PARAMETER_SPACE,
                    next_parameter_strategy=expected_improvement,
                    regression_model=GaussianProcessRegressor)
start_time = time.time()
bbopt.optimize()
print("-----------------")
print(f"Total number of iterations : {NBR_ITERATION}")
print(f"{(time.time() - start_time)} seconds spent for finding solution")
print("-----------------")
bbopt.summarize()
print(f"Fitness history : {bbopt.history['fitness']}")
#cbb.save_experiment(filename = "flavor_optim", save=True)

In [None]:
import numpy as np
import pandas as pd
import os
!pip3 install pickle5
import pickle5 as pickle5

pathfile = os.path.join(os.getcwd(), "flavor_optim")
df = pd.read_pickle(pathfile)
df.head(2)
#list(df.columns)

Plotting Workflow duration Vs BB Size : efficient frontier


In [None]:
import plotly.express as px
import plotly.graph_objects as go
colors = px.colors.qualitative.Plotly
fig = go.Figure()
fig.add_traces(go.Scatter(x=df['BB_size'], y = df['Fitness'], 
                          text = ["trial_index=" + str(i) for i in list(df.index)],
                          mode = 'markers', line=dict(color=colors[0])))
fig.update_xaxes(title_text='Size of BB in bytes (B = 1e9 = GB)')
fig.update_yaxes(title_text='Workflow duration in seconds')
fig.show()

- each point is a placement configuration:
  - for each dataset:
    - the tier where it should be placed
    - the use or not of the BB to prefetch/buffer 

Show some samples

In [None]:
# indicate the number of trial
cbb.display_placement(df.loc[29]["Param"])

Efficient frontier / Pareto frontier

In [None]:
from cluster_simulator.utils import convex_hull
points = []
for (xi, yi) in zip(df['BB_size'].tolist(), (df['Fitness']).tolist()):
    points.append((xi,yi))
lower = convex_hull(points)
fig.add_traces(go.Scatter(x=np.array([low[0] for low in lower]),
                            y=np.array([low[1] for low in lower])))
fig.show()

Recommandation are elements from efficient/pareto frontier

In [None]:
rec_trials = [29, 31, 4, 6, 129]
df.loc[rec_trials][df.columns[1:]]

Display individual points (placement recommandation)

In [None]:
fig1 = cbb.display_placement(placement=bbopt.best_parameters_in_grid)
fig1.show()