In [1]:
import sys
import os

# Add the parent directory to Python path
parent_dir = os.path.dirname(os.getcwd())
sys.path.insert(0, parent_dir)

In [None]:
# helper functions for running commands and parsing output
import subprocess
import numpy as np

from pytools.experiments.utils import find_matching_experiments, parse_sim_results, parse_vllm_results, calculate_errors, plot_results

def run_command(command):
    try:
        # print(f"Running command: {' '.join(command)}")
        result = subprocess.run(command, check=True, capture_output=True)
        # print(f"Command '{' '.join(command)}' executed successfully.")
        return result.stdout.decode('utf-8')
    
    
    except subprocess.CalledProcessError as e:
        print(f"Error executing command '{' '.join(command)}': {e}")
        print(f"Output: {e.output}")
        print(f"Error output: {e.stderr}")
        raise
    
def parse_output(raw_results: str):
    # split tests by newlines
    
    results = raw_results.split("\n\n")

    sim_values = []
    
    for i in range(len(results)-1):
        result = results[i]
    
        # find list after "TTFTs             :"
        try:
            start_index = result.find("TTFTs             :") + len("TTFTs             :")
            end_index = result.find("\n", start_index)
            ttfts_str = result[start_index:end_index-3].strip()
            ttfts = [float(x) for x in ttfts_str.strip("[]").split(", ")]
            # find the list after "E2Es             :"
            start_index = result.find("E2Es             :") + len("E2Es             :")
            end_index = result.find("\n", start_index)
            e2es_str = result[start_index:end_index-3].strip()
            e2es = [float(x) for x in e2es_str.strip("[]").split(", ")]
            sim_values.append((np.array(ttfts), np.array(e2es)))
        except ValueError as e:
            print(f"Error parsing output: {e}")
            sim_values.append((np.array([]), np.array([])))

    return sim_values

def parse_outputs_from_file(sim_dir: str, vllm_dir: str, sweep_configs: dict):
    # sim_dir = "./results/sweep_params"
    # vllm_dir = "./blackbox"

    # sweep_configs = {
    #     'num_prompts': [400],
    #     'request_rate': [32],
    #     'temperature': [0.0],
    #     'max_num_batched_tokens': [256],
    #     'long_prefill_token_threshold': [16],
    #     'datasets': [{'name': 'sharegpt', 'path': 'ShareGPT_V3_unfiltered_cleaned_split.json'}]
    # }
    # # Find matching experiments
    matching_pairs = find_matching_experiments(sim_dir, vllm_dir, sweep_configs)

    # print(f"Found {len(matching_pairs)} matching experiment pairs")
    vllm_ttfts = []
    vllm_e2es = []
    sim_ttfts = []
    sim_e2es = []
    # Process each pair
    for sim_file, vllm_file, config in matching_pairs:
        # print(f"\nProcessing: {os.path.basename(sim_file)}")
        
        # Parse results
        sim_results = parse_sim_results(sim_file)
        sim_ttfts.extend(sim_results['ttfts'])
        sim_e2es.extend(sim_results['e2es'])
        vllm_results = parse_vllm_results(vllm_file)
        vllm_ttfts.extend(vllm_results['ttfts'])
        vllm_e2es.extend(vllm_results['e2es'])

    vllm_values = (np.array(vllm_ttfts), np.array(vllm_e2es))
    sim_values = (np.array(sim_ttfts), np.array(sim_e2es))
    return sim_values, vllm_values

def get_error(sim_values, vllm_values):
    # calculate mse
    if len(sim_values) != len(vllm_values):
        raise ValueError("Length of simulation values and vLLM values must match")
    
    vllm_ttfts, vllm_e2es = vllm_values
    sim_ttfts, sim_e2es = sim_values
    mse = 0.0
    for i in range(len(sim_values)):
        mse_ttfts = np.mean((sim_ttfts[i] - vllm_ttfts[i]) ** 2)
        mse_e2es = np.mean((sim_e2es[i] - vllm_e2es[i]) ** 2)
        mse += (mse_ttfts + mse_e2es) / 2
        
    mse /= len(sim_values)
    return mse


# main optimizer function

def black_box_function(sum_decode_tokens: float, sum_prefill_tokens: float, max_prefill_tokens: float, num_prefills: float, sum_decode_tokenss2: float, sum_decode_tokensmsumprefill_tokens: float, sum_decode_tokensmmaxprefill_tokens: float, sum_decode_tokensmnumprefills: float, sum_prefill_tokenss2: float, sum_prefill_tokensmmaxprefill_tokens: float, sum_prefill_tokensmnumprefills: float, max_prefill_tokenss2: float, max_prefill_tokensmnumprefills: float, num_prefillss2: float, intercept: float, schedule_time: float, update_time: float, queue_overhead: float, vllm_overhead: float):
    
    # run a python subprocess to execute the vllm command
    sim_dir = "./results/sweep_params"
    vllm_dir = "./blackbox"

    sweep_configs = {
        'num_prompts': [400],
        'request_rate': [32],
        'temperature': [0.0],
        'max_num_batched_tokens': [256],
        'long_prefill_token_threshold': [16],
        'datasets': [{'name': 'sharegpt', 'path': 'ShareGPT_V3_unfiltered_cleaned_split.json'}]
    }

    coefficients = [sum_decode_tokens, sum_prefill_tokens, max_prefill_tokens, num_prefills, sum_decode_tokenss2, sum_decode_tokensmsumprefill_tokens, sum_decode_tokensmmaxprefill_tokens, sum_decode_tokensmnumprefills, sum_prefill_tokenss2, sum_prefill_tokensmmaxprefill_tokens, sum_prefill_tokensmnumprefills, max_prefill_tokenss2, max_prefill_tokensmnumprefills, num_prefillss2, intercept]
    coefficients_str = ','.join(map(str, coefficients))

    result = run_command(["python","request_rate_sweep.py", 
                          "--rates", f" {str(sweep_configs['request_rate']).strip('[]').replace(',', ' ')}",
                          "--long_prefill_token_thresholds", f" {str(sweep_configs['long_prefill_token_threshold']).strip('[]').replace(',', ' ')}",
            "--max_num_batched_tokens", f" {str(sweep_configs['max_num_batched_tokens']).strip('[]').replace(',', ' ')}",
            "--num_requests", f" {str(sweep_configs['num_prompts']).strip('[]').replace(',', ' ')}",
            "--input_filename", "data/output_tokens_2025-07-07_tokenized.json",
            "--regression_coeffs", f'{coefficients_str}',
            "--schedule_time", f"{str(int(schedule_time))}",
            "--update_time", f"{str(int(update_time))}",
            "--queue_overhead_time", f"{str(int(queue_overhead))}",
            "--vllm_overhead_time", f"{str(int(vllm_overhead))}",])
    
    
    # print(result)
    
    sim_values, vllm_values = parse_outputs_from_file(sim_dir, vllm_dir, sweep_configs)

    error = get_error(sim_values, vllm_values)
    
    return -1 * error  # We want to minimize the error, so we return negative error


In [14]:

from bayes_opt import BayesianOptimization

# Bounded region of parameter space
pbounds = {
    'sum_decode_tokens': (0, 0.0001),
    'sum_prefill_tokens': (0, 0.00001),
    'max_prefill_tokens': (-0.00001, 0),
    'num_prefills': (0, 0.01),
    'sum_decode_tokenss2': (0, 0.0000001),
    'sum_decode_tokensmsumprefill_tokens': (-0.000001, 0),
    'sum_decode_tokensmmaxprefill_tokens': (0, 0.000001),
    'sum_decode_tokensmnumprefills': (0, 0.0001),
    'sum_prefill_tokenss2': (-0.0000001, 0), 
    'sum_prefill_tokensmmaxprefill_tokens': (0, 0.000001),
    'sum_prefill_tokensmnumprefills': (0, 0.0001),
    'max_prefill_tokenss2': (-0.000001, 0),
    'max_prefill_tokensmnumprefills': (-0.0001, 0),
    'num_prefillss2': (-0.001, 0),
    'intercept': (0, 0.01),
    'schedule_time': (300, 600),
    'update_time': (60, 100),
    'queue_overhead': (900, 1100),
    'vllm_overhead': (5000, 6000),}

optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds=pbounds,
    random_state=1,
)

optimizer.maximize(
    init_points=20,
    n_iter=100,
)


|   iter    |  target   | sum_de... | sum_pr... | max_pr... | num_pr... | sum_de... | sum_de... | sum_de... | sum_de... | sum_pr... | sum_pr... | sum_pr... | max_pr... | max_pr... | num_pr... | intercept | schedu... | update... | queue_... | vllm_o... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m-0.195811[39m | [39m-.170e-05[39m | [39m-.203e-06[39m | [39m-9.99e-06[39m | [39m0.0030233[39m | [39m-.467e-08[39m | [39m-9.07e-07[39m | [39m-.862e-07[39m | [39m-.455e-05[39m | [39m-6.03e-08[39m | [39m-.388e-07[39m | [39m-.191e-05[39m | [39m-3.14e-07[39m | [39m-7.95e-05[39m | [39m-0.000121[39m | [39m0.0002738[39m | [39m501.14025[39m | [39m76.692192[39m | [39m1011.7379[39m | [39m5140.3869[39m |
| [35m2        [39m | [35

In [17]:
# Get the best parameters
best_params = optimizer.max['params']
best_value = optimizer.max['target']

print("Best parameters found:")
for param, value in best_params.items():
    print(f"  {param} = {value},")


Best parameters found:
  sum_decode_tokens = 0.0001,
  sum_prefill_tokens = 1.7671902233442285e-06,
  max_prefill_tokens = -4.582863926798631e-09,
  num_prefills = 0.01,
  sum_decode_tokenss2 = 7.376065441210368e-08,
  sum_decode_tokensmsumprefill_tokens = -5.411726077554151e-07,
  sum_decode_tokensmmaxprefill_tokens = 5.566536022214733e-07,
  sum_decode_tokensmnumprefills = 0.0001,
  sum_prefill_tokenss2 = -8.271329134850922e-08,
  sum_prefill_tokensmmaxprefill_tokens = 8.158530128737798e-07,
  sum_prefill_tokensmnumprefills = 5.1538221504621096e-05,
  max_prefill_tokenss2 = -6.40230381522052e-07,
  max_prefill_tokensmnumprefills = -0.0001,
  num_prefillss2 = -0.001,
  intercept = 0.0062598062609983355,
  schedule_time = 600.0,
  update_time = 100.0,
  queue_overhead = 1024.6671162870252,
  vllm_overhead = 5398.199664878566,


In [21]:
# Test on baseline parameters
black_box_function(
    sum_decode_tokens= 3.38283913e-05,
    sum_prefill_tokens= 9.82346868e-06,
    max_prefill_tokens= -3.11237143e-06,
    num_prefills=1.50291993e-03,
    sum_decode_tokenss2=4.24173346e-08,
    sum_decode_tokensmsumprefill_tokens=-1.06897441e-07,
    sum_decode_tokensmmaxprefill_tokens=1.92844617e-07,
    sum_decode_tokensmnumprefills=2.60430816e-05,
    sum_prefill_tokenss2=-7.72212201e-09,
    sum_prefill_tokensmmaxprefill_tokens=2.67059068e-08,
    sum_prefill_tokensmnumprefills=7.20303280e-06,
    max_prefill_tokenss2=-1.06904337e-08,
    max_prefill_tokensmnumprefills=-1.05254706e-05,
    num_prefillss2=-9.19828725e-04,
    intercept=0.005708624032334771,
    schedule_time=544,
    update_time=80,
    queue_overhead=1000,
    vllm_overhead=6000
)


np.float64(-0.19329247886431383)

In [18]:
# Test on current best parameters
black_box_function(
    sum_decode_tokens = 0.0001,
    sum_prefill_tokens = 1.7671902233442285e-06,
    max_prefill_tokens = -4.582863926798631e-09,
    num_prefills = 0.01,
    sum_decode_tokenss2 = 7.376065441210368e-08,
    sum_decode_tokensmsumprefill_tokens = -5.411726077554151e-07,
    sum_decode_tokensmmaxprefill_tokens = 5.566536022214733e-07,
    sum_decode_tokensmnumprefills = 0.0001,
    sum_prefill_tokenss2 = -8.271329134850922e-08,
    sum_prefill_tokensmmaxprefill_tokens = 8.158530128737798e-07,
    sum_prefill_tokensmnumprefills = 5.1538221504621096e-05,
    max_prefill_tokenss2 = -6.40230381522052e-07,
    max_prefill_tokensmnumprefills = -0.0001,
    num_prefillss2 = -0.001,
    intercept = 0.0062598062609983355,
    schedule_time = 600.0,
    update_time = 100.0,
    queue_overhead = 1024.6671162870252,
    vllm_overhead = 5398.199664878566,
)

np.float64(-0.19083367768801712)

In [22]:
import os

vllm_dir = "./blackbox"
sim_dir = "./results/sweep_params"
sweep_configs = {
    'num_prompts': [400],
    'request_rate': [32],
    'temperature': [0.0],
    'max_num_batched_tokens': [256],
    'long_prefill_token_threshold': [16],
    'datasets': [{'name': 'sharegpt', 'path': 'ShareGPT_V3_unfiltered_cleaned_split.json'}]
}


# graphing parameters
x_axis = 'long_prefill_token_threshold'
y_axis = 'max_num_batched_tokens'
metrics = ['ttft_accuracy', 'tpot_accuracy', 'e2e_accuracy', 'duration_accuracy']
plotting_data = {}
for metric in metrics:
    plotting_data[metric] = {}

# Find matching experiments
matching_pairs = find_matching_experiments(sim_dir, vllm_dir, sweep_configs)

print(f"Found {len(matching_pairs)} matching experiment pairs")

# Process each pair
for sim_file, vllm_file, config in matching_pairs:
    print(f"\nProcessing: {os.path.basename(sim_file)}")
    
    # Parse results
    sim_results = parse_sim_results(sim_file)
    vllm_results = parse_vllm_results(vllm_file)
    
    print(f"Simulation results: {sim_results}")
    print(f"vLLM results: {vllm_results}")
    
    # Calculate errors
    errors = calculate_errors(sim_results, vllm_results)
    # Print results
    for metric, value in errors.items():
        print(f"{metric}: {value}")
        

# plot_results(plotting_data, x_axis, y_axis, sweep_configs)

Found 1 matching experiment pairs

Processing: exp_400p_32r_0.0t_256mbt_16lpt_sharegpt.txt
Simulation results: {'ttfts': [0.321262, 0.013161, 0.054838, 0.035236, 0.018522, 0.015225, 0.009926, 0.02314, 0.257809, 0.161596, 0.085921, 0.00854, 0.095208, 0.081748, 0.007947, 0.132033, 0.046587, -0.002435, 0.060552, 0.037202, 0.018954, 0.113175, -0.014189, 0.097684, 0.023572, 0.025117, 0.05388, 0.087228, 0.043237, 0.034136, 0.236576, 0.109175, 0.002985, 0.271902, 0.020893, 0.277789, 0.103632, 0.016037, 0.083214, 0.012657, 0.005729, 0.030852, 0.379665, 0.022027, 0.029003, 0.012863, 0.147879, 0.034749, 0.122158, 0.011842, 0.016572, 0.094467, 0.182415, 0.327781, 0.138671, 0.027778, 0.013955, 0.018263, 0.004143, 0.002798, 0.125809, 0.063634, 0.053742, 0.015651, 0.248811, 0.320028, 0.048227, 0.089059, 0.006563, 0.004142, 0.150716, 0.010344, 0.003421, 0.01065, 0.182252, 0.025313, 0.043268, 0.140175, 0.147561, 0.016412, 0.007891, 0.022446, 0.019399, 0.006843, 0.300191, 0.223544, 0.050551, 0.128837, 