In [1]:
import glob
import json

import pandas as pd

In [2]:
paths = glob.glob("/n/home10/jlakha/llm-routing/outputs/experiment-1797682/runs/*.json")

In [3]:
# {2500, 5000, 7500, 10000} x {0.25, 0.5, 0.75, 1.0}: /n/home10/jlakha/llm-routing/outputs/experiment-1791347/runs/*.json"
# {1000, 2000, ..., 10000} x {0.0, 0.1, ..., 1.0}: /n/home10/jlakha/llm-routing/outputs/experiment-1797682/runs/*.json"

In [4]:
statistics = {}

for path in paths:
    with open(path, "r") as f:
        run = json.load(f)
    
    target_cost = run['config']['target_cost']
    target_load = run['config']['target_load']
    quality = run['total']['quality']['mean']
    latency = run['total']['latency']['mean']
    predictor = run['config']['quality_predictor']
    share_70b = run['meta-llama/Llama-3.1-70B-Instruct']['num_tokens']['share']
    run_identifier = run['config']['run_identifier']
        
    if target_cost not in statistics:
        statistics[target_cost] = {}
    
    if predictor not in statistics[target_cost]:
        statistics[target_cost][predictor] = {}
    
    statistics[target_cost][predictor][target_load] = {
        'latency': latency,
        'mean_quality': quality,
        'predictor': predictor,
        'target_load': target_load,
        'tokens_70_share': share_70b,
        'run_identifier': run_identifier,
    }
    

statistics

{0.4: {'perfect': {1000: {'latency': 9.69833262205124,
    'mean_quality': 3.84,
    'predictor': 'perfect',
    'target_load': 1000,
    'tokens_70_share': 0.127,
    'run_identifier': '1797682-56'},
   8000: {'latency': 71.73910795688629,
    'mean_quality': 4.25,
    'predictor': 'perfect',
    'target_load': 8000,
    'tokens_70_share': 0.385,
    'run_identifier': '1797682-52'},
   9000: {'latency': 80.15930609464645,
    'mean_quality': 4.25,
    'predictor': 'perfect',
    'target_load': 9000,
    'tokens_70_share': 0.385,
    'run_identifier': '1797682-64'},
   5000: {'latency': 42.90731979370117,
    'mean_quality': 4.1,
    'predictor': 'perfect',
    'target_load': 5000,
    'tokens_70_share': 0.279,
    'run_identifier': '1797682-60'},
   10000: {'latency': 73.81916588068009,
    'mean_quality': 4.25,
    'predictor': 'perfect',
    'target_load': 10000,
    'tokens_70_share': 0.385,
    'run_identifier': '1797682-54'},
   3000: {'latency': 25.619080309867858,
    'mean_qua

In [5]:
pd.set_option('display.width', 1000)

In [6]:
statistics = dict(sorted(statistics.items()))

for target_cost, v in statistics.items():
    v = dict(sorted(v.items()))
    for router, data in v.items():
        data = dict(sorted(data.items()))
        print(target_cost, router)
        print(pd.DataFrame(data))
        print('\n\n')

0.0 perfect
                     0           1000       2000        3000       4000        5000       6000        7000       8000        9000        10000
latency            1.77933     1.53526    2.36681    2.375087   1.797915      1.8164   1.718837    1.436166   1.313156    1.337462     1.61704
mean_quality           3.4        3.41       3.41        3.41       3.41        3.41       3.41        3.41       3.41        3.41        3.41
predictor          perfect     perfect    perfect     perfect    perfect     perfect    perfect     perfect    perfect     perfect     perfect
target_load              0        1000       2000        3000       4000        5000       6000        7000       8000        9000       10000
tokens_70_share      0.003       0.019      0.019       0.019      0.019       0.019      0.019       0.019      0.019       0.019       0.019
run_identifier   1797682-0  1797682-12  1797682-2  1797682-14  1797682-4  1797682-16  1797682-6  1797682-18  1797682-8  1797682-20