In [24]:
import sys
import os

# Add the parent directory to Python path
parent_dir = os.path.dirname(os.getcwd())
sys.path.insert(0, parent_dir)

In [None]:
# helper functions for running commands and parsing output
import subprocess
import numpy as np

from pytools.experiments.utils import find_matching_experiments, parse_sim_results, parse_vllm_results, calculate_errors, plot_results

def run_command(command):
    try:
        # print(f"Running command: {' '.join(command)}")
        result = subprocess.run(command, check=True, capture_output=True)
        # print(f"Command '{' '.join(command)}' executed successfully.")
        return result.stdout.decode('utf-8')
    
    
    except subprocess.CalledProcessError as e:
        print(f"Error executing command '{' '.join(command)}': {e}")
        print(f"Output: {e.output}")
        print(f"Error output: {e.stderr}")
        raise
    
def parse_output(raw_results: str):
    # split tests by newlines
    
    results = raw_results.split("\n\n")

    sim_values = []
    
    for i in range(len(results)-1):
        result = results[i]
    
        # find list after "TTFTs             :"
        try:
            start_index = result.find("TTFTs             :") + len("TTFTs             :")
            end_index = result.find("\n", start_index)
            ttfts_str = result[start_index:end_index-3].strip()
            ttfts = [float(x) for x in ttfts_str.strip("[]").split(", ")]
            # find the list after "E2Es             :"
            start_index = result.find("E2Es             :") + len("E2Es             :")
            end_index = result.find("\n", start_index)
            e2es_str = result[start_index:end_index-3].strip()
            e2es = [float(x) for x in e2es_str.strip("[]").split(", ")]
            sim_values.append((np.array(ttfts), np.array(e2es)))
        except ValueError as e:
            print(f"Error parsing output: {e}")
            sim_values.append((np.array([]), np.array([])))

    return sim_values

def parse_outputs_from_file(sim_dir: str, vllm_dir: str, sweep_configs: dict):
    # sim_dir = "./results/sweep_params"
    # vllm_dir = "./blackbox"

    # sweep_configs = {
    #     'num_prompts': [400],
    #     'request_rate': [32],
    #     'temperature': [0.0],
    #     'max_num_batched_tokens': [256],
    #     'long_prefill_token_threshold': [16],
    #     'datasets': [{'name': 'sharegpt', 'path': 'ShareGPT_V3_unfiltered_cleaned_split.json'}]
    # }
    # # Find matching experiments
    matching_pairs = find_matching_experiments(sim_dir, vllm_dir, sweep_configs)

    # print(f"Found {len(matching_pairs)} matching experiment pairs")
    vllm_ttfts = []
    vllm_e2es = []
    sim_ttfts = []
    sim_e2es = []
    # Process each pair
    for sim_file, vllm_file, config in matching_pairs:
        # print(f"\nProcessing: {os.path.basename(sim_file)}")
        
        # Parse results
        sim_results = parse_sim_results(sim_file)
        sim_ttfts.extend(sim_results['ttfts'])
        sim_e2es.extend(sim_results['e2es'])
        vllm_results = parse_vllm_results(vllm_file)
        vllm_ttfts.extend(vllm_results['ttfts'])
        vllm_e2es.extend(vllm_results['e2es'])

    vllm_values = (np.array(vllm_ttfts), np.array(vllm_e2es))
    sim_values = (np.array(sim_ttfts), np.array(sim_e2es))
    return sim_values, vllm_values

def get_error(sim_values, vllm_values):
    # calculate mse
    if len(sim_values) != len(vllm_values):
        raise ValueError("Length of simulation values and vLLM values must match")
    
    vllm_ttfts, vllm_e2es = vllm_values
    sim_ttfts, sim_e2es = sim_values
    mse = 0.0
    for i in range(len(sim_ttfts)):
        mse_ttfts = (sim_ttfts[i] - vllm_ttfts[i]) ** 2
        mse_e2es = (sim_e2es[i] - vllm_e2es[i]) ** 2
        mse += (mse_ttfts + mse_e2es) / 2
        
    mse /= len(sim_ttfts)
    return mse


# main optimizer function

def black_box_function(sum_decode_tokens: float, sum_prefill_tokens: float, max_prefill_tokens: float, num_prefills: float, sum_decode_tokenss2: float, sum_decode_tokensmsumprefill_tokens: float, sum_decode_tokensmmaxprefill_tokens: float, sum_decode_tokensmnumprefills: float, sum_prefill_tokenss2: float, sum_prefill_tokensmmaxprefill_tokens: float, sum_prefill_tokensmnumprefills: float, max_prefill_tokenss2: float, max_prefill_tokensmnumprefills: float, num_prefillss2: float, intercept: float, schedule_time: float, update_time: float, queue_overhead: float, vllm_overhead: float):
    
    # run a python subprocess to execute the vllm command
    sim_dir = "./results/sweep_params"
    vllm_dir = "./blackbox"

    sweep_configs = {
        'num_prompts': [400],
        'request_rate': [32],
        'temperature': [0.0],
        'max_num_batched_tokens': [256],
        'long_prefill_token_threshold': [16],
        'datasets': [{'name': 'sharegpt', 'path': 'ShareGPT_V3_unfiltered_cleaned_split.json'}]
    }

    coefficients = [sum_decode_tokens, sum_prefill_tokens, max_prefill_tokens, num_prefills, sum_decode_tokenss2, sum_decode_tokensmsumprefill_tokens, sum_decode_tokensmmaxprefill_tokens, sum_decode_tokensmnumprefills, sum_prefill_tokenss2, sum_prefill_tokensmmaxprefill_tokens, sum_prefill_tokensmnumprefills, max_prefill_tokenss2, max_prefill_tokensmnumprefills, num_prefillss2, intercept]
    coefficients_str = ','.join(map(str, coefficients))

    command = ["python","request_rate_sweep.py", "--rates"]
    command.extend(list(map(str, sweep_configs["request_rate"])))
    command.append("--long_prefill_token_thresholds")
    command.extend(list(map(str, sweep_configs["long_prefill_token_threshold"])))
    command.append("--max_num_batched_tokens")
    command.extend(list(map(str, sweep_configs["max_num_batched_tokens"])))
    command.extend(["--num_requests", f"{str(sweep_configs['num_prompts']).strip('[]').replace(',', '')}",
            "--input_filename", "data/output_tokens_2025-07-07_tokenized.json",
            "--regression_coeffs", f'{coefficients_str}',
            "--schedule_time", f"{str(int(schedule_time))}",
            "--update_time", f"{str(int(update_time))}",
            "--queue_overhead_time", f"{str(int(queue_overhead))}",
            "--vllm_overhead_time", f"{str(int(vllm_overhead))}"])
    result = run_command(command)
    
    # print(result)
    
    sim_values, vllm_values = parse_outputs_from_file(sim_dir, vllm_dir, sweep_configs)

    error = get_error(sim_values, vllm_values)
    
    return -1 * error  # We want to minimize the error, so we return negative error


In [None]:
baseline_values = {
    "sum_decode_tokens": 3.38283913e-05,
    "sum_prefill_tokens": 9.82346868e-06,
    "max_prefill_tokens": -3.11237143e-06,
    "num_prefills": 1.50291993e-03,
    "sum_decode_tokenss2": 4.24173346e-08,
    "sum_decode_tokensmsumprefill_tokens": -1.06897441e-07,
    "sum_decode_tokensmmaxprefill_tokens": 1.92844617e-07,
    "sum_decode_tokensmnumprefills": 2.60430816e-05,
    "sum_prefill_tokenss2": -7.72212201e-09,
    "sum_prefill_tokensmmaxprefill_tokens": 2.67059068e-08,
    "sum_prefill_tokensmnumprefills": 7.20303280e-06,
    "max_prefill_tokenss2": -1.06904337e-08,
    "max_prefill_tokensmnumprefills": -1.05254706e-05,
    "num_prefillss2": -9.19828725e-04,
    "intercept": 0.005708624032334771,
    "schedule_time": 544,
    "update_time": 80,
    "queue_overhead": 1000,
    "vllm_overhead": 6000
}

In [None]:

from bayes_opt import BayesianOptimization

# Bounded region of parameter space
pbounds = {
    'sum_decode_tokens': (0, 0.0001),
    'sum_prefill_tokens': (0, 0.00001),
    'max_prefill_tokens': (-0.00001, 0),
    'num_prefills': (0, 0.01),
    'sum_decode_tokenss2': (0, 0.0000001),
    'sum_decode_tokensmsumprefill_tokens': (-0.000001, 0),
    'sum_decode_tokensmmaxprefill_tokens': (0, 0.000001),
    'sum_decode_tokensmnumprefills': (0, 0.0001),
    'sum_prefill_tokenss2': (-0.0000001, 0), 
    'sum_prefill_tokensmmaxprefill_tokens': (0, 0.000001),
    'sum_prefill_tokensmnumprefills': (0, 0.0001),
    'max_prefill_tokenss2': (-0.000001, 0),
    'max_prefill_tokensmnumprefills': (-0.0001, 0),
    'num_prefillss2': (-0.001, 0),
    'intercept': (0, 0.01),
    'schedule_time': (300, 600),
    'update_time': (60, 100),
    'queue_overhead': (900, 1100),
    'vllm_overhead': (5000, 6000),}

# Warm start with baseline values
# pbounds = {}
# for coeff in baseline_values:
#     lower_bound = baseline_values[coeff]*0.9
#     upper_bound = baseline_values[coeff]*1.1
#     pbounds[coeff] = (min(lower_bound, upper_bound), max(lower_bound, upper_bound))

optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds=pbounds,
    random_state=1,
)

optimizer.maximize(
    init_points=20,
    n_iter=200,
)


optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds=pbounds,
    random_state=1,
)

optimizer.maximize(
    init_points=20,
    n_iter=100,
)


|   iter    |  target   | sum_de... | sum_pr... | max_pr... | num_pr... | sum_de... | sum_de... | sum_de... | sum_de... | sum_pr... | sum_pr... | sum_pr... | max_pr... | max_pr... | num_pr... | intercept | schedu... | update... | queue_... | vllm_o... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m-946.7581[39m | [39m-.170e-05[39m | [39m-.203e-06[39m | [39m-9.99e-06[39m | [39m0.0030233[39m | [39m-.467e-08[39m | [39m-9.07e-07[39m | [39m-.862e-07[39m | [39m-.455e-05[39m | [39m-6.03e-08[39m | [39m-.388e-07[39m | [39m-.191e-05[39m | [39m-3.14e-07[39m | [39m-7.95e-05[39m | [39m-0.000121[39m | [39m0.0002738[39m | [39m501.14025[39m | [39m76.692192[39m | [39m1011.7379[39m | [39m5140.3869[39m |
| [39m2        [39m | [39

In [31]:
# Get the best parameters
best_params = optimizer.max['params']
best_value = optimizer.max['target']

print("Best parameters found:")
for param, value in best_params.items():
    print(f"  {param} = {value},")

print(f"\nBest objective value: {best_value}")


Best parameters found:
  sum_decode_tokens = 0.0,
  sum_prefill_tokens = 1e-05,
  max_prefill_tokens = -1e-05,
  num_prefills = 0.0,
  sum_decode_tokenss2 = 2.165940906966959e-08,
  sum_decode_tokensmsumprefill_tokens = -9.673344850119865e-07,
  sum_decode_tokensmmaxprefill_tokens = 2.701334846388116e-07,
  sum_decode_tokensmnumprefills = 6.143719244260631e-05,
  sum_prefill_tokenss2 = -8.647379316527295e-08,
  sum_prefill_tokensmmaxprefill_tokens = 1.2981093746391148e-07,
  sum_prefill_tokensmnumprefills = 0.0,
  max_prefill_tokenss2 = -2.0232696437639207e-07,
  max_prefill_tokensmnumprefills = -0.0001,
  num_prefillss2 = 0.0,
  intercept = 0.01,
  schedule_time = 572.0821739356276,
  update_time = 79.26114415257787,
  queue_overhead = 1006.182816397264,
  vllm_overhead = 5276.069109603579,

Best objective value: -0.0023636775712843174


In [46]:
# Test on baseline parameters
black_box_function(
    sum_decode_tokens= 3.38283913e-05,
    sum_prefill_tokens= 9.82346868e-06,
    max_prefill_tokens= -3.11237143e-06,
    num_prefills=1.50291993e-03,
    sum_decode_tokenss2=4.24173346e-08,
    sum_decode_tokensmsumprefill_tokens=-1.06897441e-07,
    sum_decode_tokensmmaxprefill_tokens=1.92844617e-07,
    sum_decode_tokensmnumprefills=2.60430816e-05,
    sum_prefill_tokenss2=-7.72212201e-09,
    sum_prefill_tokensmmaxprefill_tokens=2.67059068e-08,
    sum_prefill_tokensmnumprefills=7.20303280e-06,
    max_prefill_tokenss2=-1.06904337e-08,
    max_prefill_tokensmnumprefills=-1.05254706e-05,
    num_prefillss2=-9.19828725e-04,
    intercept=0.005708624032334771,
    schedule_time=544,
    update_time=80,
    queue_overhead=1000,
    vllm_overhead=6000
)


np.float64(-0.9661828370336194)

In [43]:
# Test on current best parameters
black_box_function(
    sum_decode_tokens = 0.0,
  sum_prefill_tokens = 1e-05,
  max_prefill_tokens = -1e-05,
  num_prefills = 0.0,
  sum_decode_tokenss2 = 2.165940906966959e-08,
  sum_decode_tokensmsumprefill_tokens = -9.673344850119865e-07,
  sum_decode_tokensmmaxprefill_tokens = 2.701334846388116e-07,
  sum_decode_tokensmnumprefills = 6.143719244260631e-05,
  sum_prefill_tokenss2 = -8.647379316527295e-08,
  sum_prefill_tokensmmaxprefill_tokens = 1.2981093746391148e-07,
  sum_prefill_tokensmnumprefills = 0.0,
  max_prefill_tokenss2 = -2.0232696437639207e-07,
  max_prefill_tokensmnumprefills = -0.0001,
  num_prefillss2 = 0.0,
  intercept = 0.01,
  schedule_time = 572.0821739356276,
  update_time = 79.26114415257787,
  queue_overhead = 1006.182816397264,
  vllm_overhead = 5276.069109603579,
)

np.float64(-0.0023636775712843174)

In [44]:
import os

vllm_dir = "./blackbox"
sim_dir = "./results/sweep_params"
sweep_configs = {
    'num_prompts': [400],
    'request_rate': [32],
    'temperature': [0.0],
    'max_num_batched_tokens': [256],
    'long_prefill_token_threshold': [16],
    'datasets': [{'name': 'sharegpt', 'path': 'ShareGPT_V3_unfiltered_cleaned_split.json'}]
}


# graphing parameters
x_axis = 'long_prefill_token_threshold'
y_axis = 'max_num_batched_tokens'
metrics = ['ttft_accuracy', 'tpot_accuracy', 'e2e_accuracy', 'duration_accuracy']
plotting_data = {}
for metric in metrics:
    plotting_data[metric] = {}

# Find matching experiments
matching_pairs = find_matching_experiments(sim_dir, vllm_dir, sweep_configs)

print(f"Found {len(matching_pairs)} matching experiment pairs")

# Process each pair
for sim_file, vllm_file, config in matching_pairs:
    print(f"\nProcessing: {os.path.basename(sim_file)}")
    
    # Parse results
    sim_results = parse_sim_results(sim_file)
    vllm_results = parse_vllm_results(vllm_file)
    
    # Calculate errors
    errors = calculate_errors(sim_results, vllm_results)
    # Print results
    for metric, value in errors.items():
        print(f"{metric}: {value}")
        

# plot_results(plotting_data, x_axis, y_axis, sweep_configs)

Found 1 matching experiment pairs

Processing: exp_400p_32r_0.0t_256mbt_16lpt_sharegpt.txt
ttft_mse: 10.872083464916573
ttft_accuracy: -2934.5287108820853
tpot_mse: 0.0006870317951128006
tpot_accuracy: -56.40852090417212
e2e_mse: 38.955120388310064
e2e_accuracy: 0
duration_accuracy: 42.38788584740828


In [None]:
ttft_mse: 0.025188282731345088
ttft_accuracy: 48.2461751138668
tpot_mse: 3.695219311984646e-05
tpot_accuracy: 59.17752753509921
e2e_mse: 1.9071773913358943
e2e_accuracy: 54.04913470013282
duration_accuracy: 94.59522422830518