In [75]:
import os
import re
from collections import defaultdict
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [84]:
result_dir = '/result_opt'
csv_file = 'res.csv'

In [77]:
def get_json(fn):
    j = None
    try:
        with open(fn, 'r') as f:
            j = json.load(f)
    except Exception as e:
        print(f"Opening file {fn} failed with exception {e}")
        #sys.exit(1)
    return j

def get_result_files(directory):
    all_files = defaultdict(lambda: defaultdict(list))
    for root, dirs, files in os.walk(directory):
        for file in files:
            #all_files.append(os.path.join(root, file))
            r = root[len(directory)+1:].split('/')
            all_files[r[0]][r[1]].append(os.path.join(root, file))            
            #all_files[(r[0], r[1])].append(file)
    return all_files

def get_highest_concurrency_file(files):
    t = [(int(f[f.rfind('C')+1:f.rfind('.')]), f) for f in files]
    return max(t)

def extract_results_from_json(fn):
    j = get_json(fn)
    return {
        'Model': j['model_id'],
        'Input Tokens': int(j['total_input_tokens']/j['num_prompts']),
        'Output Tokens': int(j['total_output_tokens']/j['num_prompts']),
        'Concurrency': int(j['request_rate']),
        'Output tput (tokens/sec)': round(j['output_throughput'], 2),
        'Total tput (tokens/sec)': round(j['total_token_throughput'], 2),
        'TTFT avg (ms)': round(j['mean_ttft_ms'], 2),
        'TTFT P50 (ms)': round(j['p50_ttft_ms'], 2),
        'TTFT P90 (ms)': round(j['p90_ttft_ms'], 2),
        'TTFT P99 (ms)': round(j['p99_ttft_ms'], 2),
        'TPOT avg (ms)': round(j['mean_tpot_ms'], 2),
        'TPOT P50 (ms)': round(j['p50_tpot_ms'], 2),
        'TPOT P90 (ms)': round(j['p90_tpot_ms'], 2),
        'TPOT P99 (ms)': round(j['p99_tpot_ms'], 2),
        'Request Throughput': round(j['request_throughput'], 2),
        'Request Latency avg (ms)': round(j['mean_e2el_ms'], 2),
        'Request Latency P50 (ms)': round(j['p50_e2el_ms'], 2),
        'Request Latency P90 (ms)': round(j['p90_e2el_ms'], 2),
        'Request Latency P99 (ms)': round(j['p99_e2el_ms'], 2),
    }

In [78]:
files = get_result_files(result_dir)

In [79]:
f = get_highest_concurrency_file(files['Qwen--Qwen2.5-1.5B-Instruct']['I128-O128'])
f

(272, '/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I128-O128/result-C272.json')

In [80]:
e = extract_results_from_json(f[1])
e

{'Model': 'Qwen/Qwen2.5-1.5B-Instruct',
 'Input Tokens': 128,
 'Output Tokens': 128,
 'Concurrency': 272,
 'Output tput (tokens/sec)': 2372.81,
 'Total tput (tokens/sec)': 4745.63,
 'TTFT avg (ms)': 2111.18,
 'TTFT P50 (ms)': 1814.52,
 'TTFT P90 (ms)': 2372.38,
 'TTFT P99 (ms)': 12954.04,
 'TPOT avg (ms)': 93.39,
 'TPOT P50 (ms)': 93.83,
 'TPOT P90 (ms)': 104.92,
 'TPOT P99 (ms)': 107.62,
 'Request Throughput': 18.54,
 'Request Latency avg (ms)': 13972.29,
 'Request Latency P50 (ms)': 13559.06,
 'Request Latency P90 (ms)': 14143.67,
 'Request Latency P99 (ms)': 25998.57}

In [83]:
values = []
for batches in files.values():
    for fns in batches.values():
        #print(fns)
        _, fn = get_highest_concurrency_file(fns)
        #print(fn, "***************************************************")
        values.append(extract_results_from_json(fn))
df = pd.DataFrame(values)
df

Unnamed: 0,Model,Input Tokens,Output Tokens,Concurrency,Output tput (tokens/sec),Total tput (tokens/sec),TTFT avg (ms),TTFT P50 (ms),TTFT P90 (ms),TTFT P99 (ms),TPOT avg (ms),TPOT P50 (ms),TPOT P90 (ms),TPOT P99 (ms),Request Throughput,Request Latency avg (ms),Request Latency P50 (ms),Request Latency P90 (ms),Request Latency P99 (ms)
0,Qwen/Qwen2.5-1.5B-Instruct,1024,1024,64,1183.08,2366.15,2835.91,3278.66,4220.41,4311.33,51.29,50.92,52.82,53.91,1.16,55306.75,55384.25,55811.73,55826.14
1,Qwen/Qwen2.5-1.5B-Instruct,1024,128,64,806.41,7257.71,2866.02,3386.28,4229.77,4348.12,56.74,53.93,69.13,78.03,6.3,10071.99,10133.57,10281.26,10291.0
2,Qwen/Qwen2.5-1.5B-Instruct,128,128,272,2372.81,4745.63,2111.18,1814.52,2372.38,12954.04,93.39,93.83,104.92,107.62,18.54,13972.29,13559.06,14143.67,25998.57
3,Qwen/Qwen2.5-1.5B-Instruct,2048,128,32,452.96,7700.25,3290.32,3477.77,4509.74,4827.94,44.66,38.37,53.57,68.96,3.54,8961.97,8957.08,9378.14,9382.96
4,Qwen/Qwen2.5-14B-Instruct-1M,1024,1024,6,72.47,144.94,3420.5,4004.97,4081.74,4123.65,79.49,78.96,81.98,82.37,0.07,84738.37,84774.12,85173.6,85174.45
5,Qwen/Qwen2.5-14B-Instruct-1M,1024,128,6,56.39,507.51,3390.54,3945.73,4041.29,4053.96,80.22,76.09,100.58,101.36,0.44,13578.89,13644.81,13716.79,13717.75
6,Qwen/Qwen2.5-14B-Instruct-1M,128,128,38,323.51,647.01,3032.39,3230.01,3340.03,3344.32,93.8,92.11,100.38,115.97,2.53,14944.57,14987.55,15069.71,15190.27
7,Qwen/Qwen2.5-14B-Instruct-1M,2048,128,4,34.57,587.76,4124.96,4944.56,5228.78,5243.31,83.89,76.84,105.16,106.43,0.27,14779.18,14775.06,15093.96,15100.74
8,Qwen/Qwen2.5-7B-Instruct,1024,1024,16,287.94,575.87,3720.36,4089.8,4174.61,4176.88,51.9,51.59,51.83,55.41,0.28,56811.27,56874.81,57107.56,57110.09
9,Qwen/Qwen2.5-7B-Instruct,1024,128,16,196.73,1770.58,3728.36,4056.54,4209.57,4212.0,51.91,49.52,49.76,79.62,1.54,10320.36,10374.19,10524.44,10527.22


In [85]:
df.to_csv(csv_file, index=False)

In [46]:
files

defaultdict(<function __main__.get_result_files.<locals>.<lambda>()>,
            {'Qwen--Qwen2.5-1.5B-Instruct': defaultdict(list,
                         {'I1024-O1024': ['/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O1024/result-C16.json',
                           '/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O1024/result-C32.json',
                           '/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O1024/result-C48.json',
                           '/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O1024/result-C64.json'],
                          'I1024-O128': ['/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O128/result-C16.json',
                           '/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O128/result-C32.json',
                           '/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O128/result-C48.json',
                           '/result_opt/Qwen--Qwen2.5-1.5B-Instruct/I1024-O128/result-C64.json'],
                          'I128-O128': ['/result_opt/Qwen