In [1]:
from utils_demo import *
from loguru import logger
from unittest.mock import patch
from pprint import pprint
import warnings
warnings.filterwarnings('ignore')
from app_decomposer import DEFAULT_CONFIGURATION, API_DICT_TS, IOI_SAMPLING_PERIOD, PERF_MODEL_DATASET_NAME

### Running app on any tier
- job id : 5168, ran on NFS storage tier
### Then running the AppDecomposer to get phases and their features
|job_id|volume     |mode |IOpattern|IOsize            |nodes|ioi_bw            |
|------|-----------|-----|---------|------------------|-----|------------------|
|5168  |3020948878 |read |seq      |1047122.6613518198|1    |54926343.236363634|
|5168  |5242880997 |read |seq      |5221993.024900398 |1    |1048576199.4      |
|5168  |10485761994|read |seq      |5221993.024900398 |1    |1048576199.4      |
|5168  |10485760317|read |seq      |10443984.379482072|1    |2097152063.4      |
|5168  |20971520634|read |seq      |10443984.379482072|1    |2097152063.4      |
|5168  |3054501888 |write|seq      |1048576.0         |1    |610900377.6       |
|5168  |2296381440 |write|seq      |10485760.0        |1    |459276288.0       |
|5168  |7361003520 |write|seq      |10485760.0        |1    |736100352.0       |
|5168  |2401239040 |write|seq      |10485760.0        |1    |480247808.0       |
|5168  |7381975040 |write|seq      |10485760.0        |1    |738197504.0       |
|5168  |11481907200|write|seq      |10485760.0        |1    |328054491.4285714 |

### running each phases on different tiers and get the total time and average bandwidth
|job_id|volume     |mode |IOpattern|IOsize            |nodes|ioi_bw            |nfs_bw            |lfs_bw            |sbb_bw            |
|------|-----------|-----|---------|------------------|-----|------------------|------------------|------------------|------------------|
|5168  |3020948878 |read |seq      |1047122.6613518198|1    |54926343.236363634|69763039.00422604 |1929086128.9910603|1351654978.9709172|
|5168  |5242880997 |read |seq      |5221993.024900398 |1    |1048576199.4      |111380034.77651256|2025842734.5440495|2201041560.4534006|
|5168  |10485761994|read |seq      |5221993.024900398 |1    |1048576199.4      |91902976.38830458 |1791519219.8872373|1337810920.387854 |
|5168  |10485760317|read |seq      |10443984.379482072|1    |2097152063.4      |38676144.67923442 |1680141053.837526 |2095475682.8537173|
|5168  |20971520634|read |seq      |10443984.379482072|1    |2097152063.4      |215951897.12908807|3759684588.3829327|4112062869.411765 |
|5168  |3054501888 |write|seq      |1048576.0         |1    |610900377.6       |91627726.42188625 |291404492.2724671 |294551773.1918997 |
|5168  |2296381440 |write|seq      |10485760.0        |1    |459276288.0       |87093011.71919444 |283959619.1418326 |320365714.28571427|
|5168  |7361003520 |write|seq      |10485760.0        |1    |736100352.0       |96188319.41667646 |307490017.12686414|330282385.247005  |
|5168  |2401239040 |write|seq      |10485760.0        |1    |480247808.0       |79366684.5149562  |238644309.28244883|304609798.30013955|
|5168  |7381975040 |write|seq      |10485760.0        |1    |738197504.0       |99861679.06712481 |310623818.22007155|332656258.8436754 |
|5168  |11481907200|write|seq      |10485760.0        |1    |328054491.4285714 |101964417.84258528|300675811.1399167 |337931753.8334756 |


### The simulator can predict the app execution on the two tiers


In [2]:
jobid = 5168
fig = plot_detected_phases(jobid=jobid, merge=True, show_phases=True, width=1200, height=600)
#fig.show() 

In [3]:
#%%capture
logger.remove()
def decompose_ioi_job(jobid):
    with patch.object(ComplexDecomposer, 'get_job_timeseries') as mock_get_timeseries:
        with patch.object(Configuration, 'get_kc_token') as mock_get_kc_token:
            with patch.object(ComplexDecomposer, 'get_job_node_count') as mock_get_node_count:
                timeseries = get_job_timeseries_from_file(job_id=jobid)                
                mock_get_timeseries.return_value = timeseries
                mock_get_kc_token.return_value = 'token'
                mock_get_node_count.return_value = 1
                # init the job decomposer
                cd = ComplexDecomposer(v0_threshold=1e-8) # unbelievable
                return cd
        
# Launch decomposition on the signal
cd = decompose_ioi_job(jobid=jobid)

representation = cd.get_job_representation(merge_clusters=True)
pprint(representation)
# This is the app encoding representation for Execution Simulator
pprint(f"compute={representation['events']}, reads={representation['read_volumes']}" 
      f"read_bw={representation['read_bw']}, writes={representation['write_volumes']}"
      f"write_bw={representation['write_bw']}, read_pattern={representation['read_pattern']}")
# Normalize signals to seconds and MB
timestamps = (cd.timestamps - cd.timestamps[0])/5
original_read = cd.read_signal/1e6
original_write = cd.write_signal/1e6

{'events': [0, 2, 11, 13, 22, 24, 26, 39, 59, 76],
 'node_count': 1,
 'read_bw': [6105.0,
             241979466.46153846,
             5242880997.0,
             5242880997.0,
             10485760317.0,
             10485760317.0,
             0,
             0,
             0,
             0],
 'read_operations': [17, 3012, 1004, 2008, 1004, 2008, 0, 0, 0, 0],
 'read_pattern': ['Uncl',
                  'Seq',
                  'Seq',
                  'Seq',
                  'Seq',
                  'Seq',
                  'Uncl',
                  'Uncl',
                  'Uncl',
                  'Uncl'],
 'read_volumes': [6105,
                  3145733064,
                  5242880997,
                  10485761994,
                  10485760317,
                  20971520634,
                  0,
                  0,
                  0,
                  0],
 'write_bw': [0, 0, 0, 0, 0, 0, 307200000.0, 1572864000.0, 2097152000.0, 0],
 'write_operations': [0, 0, 0, 0, 0, 0,

In [4]:
phases_features = cd.get_phases_features(representation,
                                         job_id = jobid,
                                         update_csv=False)

In [14]:
# Apply same BW as measured by IOI (from representation)
read_bw = list(map(lambda x: x/1e6, representation['read_bw']))
write_bw = list(map(lambda x: x/1e6, representation['write_bw']))
ioi_bw = list(map(lambda x, y: (x + y), read_bw, write_bw))
print(ioi_bw)

[0.006105, 241.97946646153846, 5242.880997, 5242.880997, 10485.760317, 10485.760317, 307.2, 1572.864, 2097.152, 0.0]


In [24]:
# Get the BW predicted if the app run on some tiers // no model, just indexed tabular
PERF_MODEL_DATASET_NAME = "performance_model_dataset_completed.csv"
current_dir = dirname(dirname(os.getcwd()))
csv_path = os.path.join(current_dir, "performance_model", "dataset",
                        PERF_MODEL_DATASET_NAME)
df = pd.read_csv(csv_path, index_col=False)
perf = {}
for tier_bw in ["ioi_bw", "nfs_bw", "lfs_bw", "sbb_bw"]:
    rough_bw = list(df[[tier_bw]].to_numpy().flatten())
    # perf adjusted to IOI
    perf[tier_bw] = list(map(lambda x: x*5/1e6, rough_bw))
    
    
print(perf["ioi_bw"])
print(perf["nfs_bw"])

[0.006105, 241.97946646153844, 5242.880997, 5242.880997, 10485.760317, 10485.760317, 307.2, 1572.864, 2097.152, 0.0]
[558.909009613235, 563.6978579481398, 538.618980932888, 565.0355972426262, 567.9236710586097, 542.0054200542006, 305.5674387337285, 390.74710847139727, 409.63460593150916, 17921.146953405016]


In [29]:
sim_time, sim_read_bw, sim_write_bw = simulate_app(representation['events'],
                                                   representation['read_volumes'],
                                                   representation['write_volumes'], 
                                                   perf["lfs_bw"], app_name="job#5168")

In [30]:
fig = display_original_sim_signals((sim_time, sim_read_bw, sim_write_bw),
                                   (timestamps, original_read, original_write),
                                   width=1000, height=900)
fig.show()