In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json

In [2]:
import matplotlib as mpl
mpl.use("pgf")
plt.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "font.family": "serif",
    "font.serif": "Times New Roman"
})
plt.rcParams['text.usetex'] = True

In [3]:
input_df = pd.read_csv("../../../../data/downsampled_data_10000_rows_first_arrivals.csv")
input_df = input_df.loc[(input_df["PromptTokens"] + 7 ) // 8 + (input_df["DecodeTokens"] + 7 ) // 8 < 2000]

In [4]:
input_df

Unnamed: 0.1,Unnamed: 0,PromptTokens,DecodeTokens,UserID,TIMESTAMP,app,input_99_app,output_99_app,sys_99_app,llmcalls,...,input_avg_app,output_avg_app,sys_avg_app,input_min_app,output_min_app,sys_min_app,input_max_app,output_max_app,sys_max_app,ArrivalTime
0,0,1422,143,11213,2024-09-04 06:00:00,0,25779.0,523.0,0,1,...,4489.778352,212.825453,0,402.0,11.0,0,35233.0,1200.0,0,0
2,2,4252,62,327626,2024-09-04 06:00:00,17,24866.0,590.0,0,12,...,5770.743586,57.641191,0,289.0,0.0,0,116005.0,2049.0,0,0
3,3,5854,14,325465,2024-09-04 06:00:00,17,24866.0,590.0,0,4,...,5770.743586,57.641191,0,289.0,0.0,0,116005.0,2049.0,0,0
4,4,4828,6,326690,2024-09-04 06:00:00,17,24866.0,590.0,0,4,...,5770.743586,57.641191,0,289.0,0.0,0,116005.0,2049.0,0,0
5,5,1798,157,71447,2024-09-04 06:00:00,16,15001.0,287.0,0,1,...,2901.501569,153.228818,0,962.0,0.0,0,16921.0,687.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,4913,271,123966,2024-09-04 06:01:11,9,23936.0,635.0,0,2,...,6481.697241,66.951571,0,490.0,1.0,0,76359.0,1763.0,0,71
9996,9996,13217,3,327038,2024-09-04 06:01:11,17,24866.0,590.0,0,3,...,5770.743586,57.641191,0,289.0,0.0,0,116005.0,2049.0,0,71
9997,9997,13805,3,253538,2024-09-04 06:01:11,17,24866.0,590.0,0,3,...,5770.743586,57.641191,0,289.0,0.0,0,116005.0,2049.0,0,71
9998,9998,8717,87,9935,2024-09-04 06:01:11,17,24866.0,590.0,0,2,...,5770.743586,57.641191,0,289.0,0.0,0,116005.0,2049.0,0,71


In [5]:
fig = plt.figure(figsize=(6, 5))

df_here = input_df.groupby("ArrivalTime").agg({"PromptTokens": "sum", "DecodeTokens": "sum", "llmcalls": "count"}).reset_index()
arrival_cumsum = list(df_here["llmcalls"])
for i in range(1, len(arrival_cumsum)):
    arrival_cumsum[i] += arrival_cumsum[i - 1]
plt.plot(df_here["ArrivalTime"], df_here["llmcalls"], label="All users")

idx = 0
for user in input_df["UserID"].value_counts().index:
    df_here = input_df.loc[input_df["UserID"] == user].groupby(
        "ArrivalTime").agg({"PromptTokens": "sum", "DecodeTokens": "sum", "llmcalls": "count"}).reset_index()
    arrival_cumsum = list(df_here["llmcalls"])
    for i in range(1, len(arrival_cumsum)):
        arrival_cumsum[i] += arrival_cumsum[i - 1]
    plt.plot(df_here["ArrivalTime"], df_here["llmcalls"], label=f"User {idx + 1}")
    idx += 1
    if idx == 5:
        break
plt.xlabel("Time (s)", fontsize=20)
plt.ylabel("RPS", fontsize=20)
plt.legend(ncols=2, fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
fig.tight_layout()
plt.savefig("arrival_curve.pdf", dpi=300)
# plt.show()
plt.close(fig)

In [6]:
df = pd.read_csv("../downsampled_data_10000_rows_first_arrivals.csv___fs_fair_interaction_limit_expect___10___2000___avg.csv")
print(len(df))
df.head()

9162


Unnamed: 0.1,Unnamed: 0,adapter_dir,prompt_len,output_len,request_latency,first_token_latency,req_time,interaction_id,req_id,sys_len,app,input99app,sys99app,output99app,priorityfactor,app_limit,llmcalls,llmcalls_made
0,0,dummy-lora-7b-rank-8-0,178,18,2.769769,1.572559,0.0,0,0,0,0,4489,1,212,1,2000,1,1
1,1,dummy-lora-7b-rank-8-2,532,8,1.789385,1.571362,0.0,160977,2,0,17,5770,1,57,1,2000,12,1
2,2,dummy-lora-7b-rank-8-3,732,2,1.6241,1.569804,0.0,160976,3,0,17,5770,1,57,1,2000,4,1
3,3,dummy-lora-7b-rank-8-4,604,1,1.569156,1.56888,0.0,160975,4,0,17,5770,1,57,1,2000,4,1
4,4,dummy-lora-7b-rank-8-5,225,20,2.827541,1.568466,0.0,157161,5,0,16,2901,1,153,1,2000,1,1


In [7]:
df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})

In [8]:
alpha = 1
beta = 2
gamma = 1

In [9]:
df["weighted_service_eq_2"] = alpha * df["input_avg_app"] + gamma * df["output_avg_app"] + beta * df["sys_avg_app"]
df["weighted_service_eq_3"] = (alpha * df["prompt_len"] + gamma * df["output_len"]) / df["weighted_service_eq_2"]

In [10]:
print(df.columns)
def adjust_wsc(row, cutoff_time):
    if row["request_latency"] + row["req_time"] > cutoff_time:
        return 0
    return row["weighted_service_eq_3"]

Index(['Unnamed: 0', 'adapter_dir', 'prompt_len', 'output_len',
       'request_latency', 'first_token_latency', 'req_time', 'interaction_id',
       'req_id', 'sys_len', 'app', 'input_avg_app', 'sys_avg_app',
       'output_avg_app', 'priorityfactor', 'app_limit', 'llmcalls',
       'llmcalls_made', 'weighted_service_eq_2', 'weighted_service_eq_3'],
      dtype='object')


In [11]:
df_users_wsc = df.groupby("adapter_dir")["weighted_service_eq_3"].agg([("wsc", "sum")]).reset_index()

In [12]:
df_users_wsc["adapter_dir"] = df_users_wsc["adapter_dir"].apply(lambda x: int(x.split("-")[-1]))
df_users_wsc.head()

Unnamed: 0,adapter_dir,wsc
0,0,1.581779
1,1,0.11737
2,10,0.544331
3,1000,0.669262
4,1001,1.097183


In [13]:
df_users_wsc["wsc"].describe()

count    1365.000000
mean        0.729748
std         5.035233
min         0.006951
25%         0.143935
50%         0.362824
75%         0.754650
max       175.695945
Name: wsc, dtype: float64

In [14]:
def assign_wsc_bucket(x):
    if x == 0:
        return 0
    if x <= 1:
        return 1
    return 2
df_users_wsc["wsc_bins"] = df_users_wsc["wsc"].apply(assign_wsc_bucket)

In [15]:
df_users_wsc["wsc_bins"].value_counts()

wsc_bins
1    1148
2     217
Name: count, dtype: int64

In [16]:
fig = plt.figure(figsize=(6, 5))
# arr = sorted(list(df_users_wsc["wsc"]))
# plt.plot([_ for _ in range(len(arr))], arr)
arr = list(df_users_wsc["wsc_bins"].value_counts())
arr = [0, arr[0], arr[1]]
plt.bar([0, 1.5, 3], arr, label="FairServe")
plt.xticks(ticks=[0, 1.5, 3], labels=["$S_i = 0$", "$0 < S_i$\n$S_i \leq 1$", "$1<S_i$"], fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel("Weighted Service Counter ($S_i$)", fontsize=20)
plt.ylabel("Number of users", fontsize=20)
plt.legend(fontsize=16)
fig.tight_layout()
plt.savefig("tmp.pdf")
plt.close(fig)

  plt.xticks(ticks=[0, 1.5, 3], labels=["$S_i = 0$", "$0 < S_i$\n$S_i \leq 1$", "$1<S_i$"], fontsize=14)


In [17]:
traces_to_plot = {
                    "../downsampled_data_10000_rows_first_arrivals.csv___lshare_fair___10___2000___avg.csv": "RPM",
                    "../downsampled_data_10000_rows_first_arrivals.csv___vtc_fair___2000___avg.csv": "VTC",
                    "../downsampled_data_10000_rows_first_arrivals.csv___fs_fair_wsc_expect___10___2000___avg.csv": "FairServe WSC",
                    "../downsampled_data_10000_rows_first_arrivals.csv___fs_fair_interaction_limit_expect___10___2000___avg.csv": "FairServe",
}

In [18]:
cutoff_time = 100

In [19]:
fig = plt.figure(figsize=(6, 5))

itr = 0
offset = 1.5/(len(traces_to_plot) + 1)

hatch_pattern = ["/", "\\", "|", "-", "+", "x", "o", "O", ".", "*"]
colors = ["#90ee90", "#add8e6", "#ffb6c1", "#e6e6fa", "#d3d3d3", "#f08080", "#ffffe0", "#87cefa", "#d3d3d3"]

for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df["weighted_service_eq_2"] = alpha * df["input_avg_app"] + gamma * df["output_avg_app"] + beta * df["sys_avg_app"]
    df["weighted_service_eq_3"] = (alpha * df["prompt_len"] + gamma * df["output_len"]) / df["weighted_service_eq_2"]
    df["weighted_service_eq_3"] = df.apply(lambda x: adjust_wsc(x, cutoff_time), axis=1)
    df_users_wsc = df.groupby("adapter_dir")["weighted_service_eq_3"].agg([("wsc", "sum")]).reset_index()
    df_users_wsc["adapter_dir"] = df_users_wsc["adapter_dir"].apply(lambda x: int(x.split("-")[-1]))
    df_users_wsc["wsc_bins"] = df_users_wsc["wsc"].apply(assign_wsc_bucket)
    arr = []
    for i in range(3):
        try:
            arr.append(df_users_wsc["wsc_bins"].value_counts().loc[i])
        except:
            arr.append(0)
    print(arr)
    x = [i * 1.5 + itr * offset for i in range(3)]
    plt.bar(x, arr, label=trace_name, width=offset, hatch=hatch_pattern[itr], color=colors[itr], alpha=.99)
    itr += 1

t = [i * 1.5 + itr//2 * offset - offset/2 for i in range(3)]
plt.xticks(ticks=t, labels=["$S_i=0$", "$0<S_i\leq 1$", "$1<S_i$"], fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel(f"Weighted Service Counter ($S_i$)", fontsize=20)
plt.ylabel("Number of users", fontsize=20)
plt.legend(fontsize=16)
fig.tight_layout()
plt.savefig("wsc_distribution_buckets.pdf", dpi=300)
plt.close(fig)

  plt.xticks(ticks=t, labels=["$S_i=0$", "$0<S_i\leq 1$", "$1<S_i$"], fontsize=14)


[740, 576, 49]
[831, 501, 33]
[831, 501, 33]
[823, 509, 33]


In [20]:
del df
del df_users_wsc

In [21]:
# fig = plt.figure(figsize=(6, 5))

req_completed = []
interactions_completed = []
users_served = []
users_with_completed_interactions = []
interactions_interrupted = []
users_with_interrupted_interactions = []

for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    if cutoff_time < 71:
        df = df.loc[df["req_time"] <= cutoff_time]
    df_interactions_wasted = df.loc[((df["first_token_latency"] < 0) & (df["llmcalls_made"] > 1))]
    df = df.loc[df["req_time"] >= 0]
    df = df.loc[df["request_latency"] > 0]
    df = df.loc[df["first_token_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    req_completed.append(len(df))
    users_served.append(len(df["adapter_dir"].value_counts()))
    df = df.loc[df["llmcalls"] > 1]
    interactions_interrupted.append(max(0, len(df_interactions_wasted["interaction_id"].value_counts()) - 3))
    users_with_interrupted_interactions.append(max(0, len(df_interactions_wasted["adapter_dir"].value_counts()) - 1))
    interactions_completed.append(len(df))
    users_with_completed_interactions.append(len(df["adapter_dir"].value_counts()))
pd.DataFrame({"Strategy": traces_to_plot.values(), "Requests Serve": req_completed, "Users Served": users_served,
             "Interactions Completed": interactions_completed, "Users with completed interactions": users_with_completed_interactions})

Unnamed: 0,Strategy,Requests Serve,Users Served,Interactions Completed,Users with completed interactions
0,RPM,2004,605,1486,343
1,VTC,2262,534,1462,295
2,FairServe WSC,2270,534,1471,295
3,FairServe,2291,542,1481,296


In [22]:
# fig = plt.figure(figsize=(6, 5))

prompt_tokens = []
decode_tokens = []
tokens_wasted = []
req_throttled = []
users_throttled = []

itr = 0
offset = 1.5/(len(traces_to_plot) + 1)

ongoing_interactions = []
users_with_ongoing_interactions = []

for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    if cutoff_time < 71:
        df = df.loc[df["req_time"] <= cutoff_time]
    req_throttled.append(len(df.loc[df["request_latency"] < 0]))
    users_throttled.append(df.loc[df["request_latency"] < 0]["adapter_dir"].nunique())
    df = df.loc[df["req_time"] >= 0]
    df = df.loc[df["request_latency"] > 0]
    df = df.loc[df["first_token_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    ongoing_requests = df.loc[df["FinishTime"] > cutoff_time]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    prompt_tokens.append(df["prompt_len"].sum() / 100)
    decode_tokens.append(df["output_len"].sum() / 100)

    ongoing_interactions.append(ongoing_requests.loc[ongoing_requests["llmcalls_made"] > 1]["interaction_id"].nunique())
    users_with_ongoing_interactions.append(ongoing_requests.loc[ongoing_requests["llmcalls_made"] > 1]["adapter_dir"].nunique())

plt.bar([0.5, 2.5, 4.5, 6.5], prompt_tokens, label="Prompt tokens", width=0.5)
plt.bar([1, 3, 5, 7], decode_tokens, label="Decode tokens", width=0.5)

plt.xticks(ticks=[1, 3, 5, 7], labels=["FairServe\nWSC", "FairServe\nInteraction", "LShare Fair", "VTC"], fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel(f"Strategy (Execution time {cutoff_time})", fontsize=20)
plt.ylabel("Throughput (tokens/s, log scale)", fontsize=20)
plt.legend(fontsize=16)
fig.tight_layout()
plt.savefig("tmp.pdf", dpi=300)
plt.close(fig)

In [23]:
df_metrics = pd.DataFrame({"Strategy": traces_to_plot.values(), 
                           "Requests Served": req_completed, 
                           "Users Served": users_served, 
                           "Users Throttled": users_throttled,
                           "Requests Throttled": req_throttled, 
                           "Interactions Completed": interactions_completed, 
                           "Interactions Throttled": interactions_interrupted,
                           "Ongoing interactions": ongoing_interactions, 
                           "Users with interrupted interactions": users_with_interrupted_interactions,
                           "Users with ongoing interactions": users_with_ongoing_interactions,
                           "Users with completed interactions": users_with_completed_interactions,
                           "Prompt": prompt_tokens, 
                           "Decode": decode_tokens
                          })

In [24]:
df_metrics

Unnamed: 0,Strategy,Requests Served,Users Served,Users Throttled,Requests Throttled,Interactions Completed,Interactions Throttled,Ongoing interactions,Users with interrupted interactions,Users with ongoing interactions,Users with completed interactions,Prompt,Decode
0,RPM,2004,605,52,2843,1486,255,1033,47,699,343,11155.15,182.85
1,VTC,2262,534,0,0,1462,0,1283,0,743,295,12190.1,262.97
2,FairServe WSC,2270,534,0,0,1471,0,1282,0,742,295,12217.11,267.05
3,FairServe,2291,542,3,79,1481,0,1275,0,740,296,12247.98,267.99


In [25]:
print(100*2823/ (2823+2004))
print(100*255/ (255+1486))
print(100*52/ (52+600))
print(100*47/ (380))

print(100*79/ (79+2291))
print(100*0/ (0+1481))
print(100*3/ (3+540))
print(100*0/ (296))

58.48353014294593
14.646754738655945
7.975460122699387
12.368421052631579
3.3333333333333335
0.0
0.5524861878453039
0.0


In [26]:
print(df_metrics.to_latex(index=False, float_format="{%.2f}"))

\begin{tabular}{lrrrrrrrrrrrr}
\toprule
Strategy & Requests Served & Users Served & Users Throttled & Requests Throttled & Interactions Completed & Interactions Throttled & Ongoing interactions & Users with interrupted interactions & Users with ongoing interactions & Users with completed interactions & Prompt & Decode \\
\midrule
RPM & 2004 & 605 & 52 & 2843 & 1486 & 255 & 1033 & 47 & 699 & 343 & {11155.15} & {182.85} \\
VTC & 2262 & 534 & 0 & 0 & 1462 & 0 & 1283 & 0 & 743 & 295 & {12190.10} & {262.97} \\
FairServe WSC & 2270 & 534 & 0 & 0 & 1471 & 0 & 1282 & 0 & 742 & 295 & {12217.11} & {267.05} \\
FairServe & 2291 & 542 & 3 & 79 & 1481 & 0 & 1275 & 0 & 740 & 296 & {12247.98} & {267.99} \\
\bottomrule
\end{tabular}



In [27]:
# percentage of throttled interactions among those which got feedback
# percentage of users with throttled interactions among those which got feecback

# percentage of users with throttled requests among those which got feedback
# 

In [28]:
throttled_user_request_distribution = []
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    # get throttled requests
    df_throttled = df.loc[df["request_latency"] < 0]
    users_throttled = df_throttled["adapter_dir"].unique()
    print(f"Users throttled by {trace_name}: {len(users_throttled)}")
    num_reqs = []
    for user in users_throttled:
        df_throttled_user_request = df.loc[df["adapter_dir"] == user]
        # print(f"User throttled by {trace_name} had {len(df_throttled_user_request)} requests")
        num_reqs.append(len(df_throttled_user_request))
    print(num_reqs)
    throttled_user_request_distribution.append(num_reqs)

Users throttled by RPM: 52
[1695, 574, 30, 130, 21, 14, 21, 12, 29, 20, 65, 15, 81, 19, 17, 33, 33, 16, 23, 25, 21, 55, 12, 21, 15, 13, 11, 45, 13, 47, 28, 22, 11, 56, 29, 13, 20, 24, 20, 18, 27, 28, 20, 25, 31, 12, 20, 14, 16, 21, 11, 11]
Users throttled by VTC: 0
[]
Users throttled by FairServe WSC: 0
[]
Users throttled by FairServe: 3
[1695, 574, 130]


In [29]:
itr = 0
bars = []
errors = []
for trace_name in traces_to_plot.values():
    # print(throttled_user_request_distribution[itr])
    arr = np.array(throttled_user_request_distribution[itr])
    print(np.std(arr))
    if np.mean(arr):
        bars.append(np.mean(arr))
    else:
        bars.append(0)
    try:
        errors.append(list(np.percentile(arr, [25, 75])))
    except:
        errors.append([0, 0])
    itr += 1

240.58217343468363
nan
nan
658.5338935004703


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [95]:
fig = plt.figure(figsize=(6, 5))

hatch_pattern = ["/", "\\", "|", "-", "+", "x", "o", "O", ".", "*"]
colors = ["#90ee90", "#add8e6", "#ffb6c1", "#e6e6fa", "#d3d3d3", "#f08080", "#ffffe0", "#87cefa", "#d3d3d3"]

for i in range(4):
    plt.bar([i + 1], bars[i], width=1, color=colors[i])
    plt.errorbar([i + 1], bars[i], yerr=[[errors[i][0]], [errors[i][1]]], fmt="o", color='r')

plt.xticks(ticks=[1, 2, 3, 4], labels=["RPM", "VTC", "FS (W)", "FS (W+I)"], fontsize=18)
plt.yticks(fontsize=20)
# plt.xlabel(f"Strategy", fontsize=20)
plt.ylabel("Request Distribution\nof Throttled Users", fontsize=20)
fig.tight_layout()
plt.savefig("throttled_user_request_distribution.pdf")
plt.close(fig)

In [31]:
ttft_distributions = []
tbt_distriubution = []
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.loc[df["request_latency"] > 0]
    df = df.loc[df["req_time"] + df["request_latency"] <= cutoff_time]
    ttft_mean = df["first_token_latency"].mean()
    ttft_25 = df["first_token_latency"].quantile([.25], interpolation="nearest").tolist()[0]
    ttft_75 = df["first_token_latency"].quantile([.75], interpolation="nearest").tolist()[0]
    ttft_distributions.append([ttft_mean, [ttft_25, ttft_75]])
    df["tbt"] = (df["request_latency"] - df["first_token_latency"]) / df["output_len"]
    tbt_mean = df["tbt"].mean()
    tbt_25 = df["tbt"].quantile([.25], interpolation="nearest").tolist()[0]
    tbt_75 = df["tbt"].quantile([.75], interpolation="nearest").tolist()[0]
    tbt_distriubution.append([tbt_mean, [tbt_25, tbt_75]])

In [32]:
ttft_distributions

[[40.20715267191866, [20.619670391082764, 60.226648807525635]],
 [42.57646585722922, [22.52255868911743, 63.72289490699768]],
 [42.821788648991856, [22.390435695648193, 63.23098969459534]],
 [42.53337373206939, [21.446158409118652, 63.02920460700989]]]

In [33]:
tbt_distriubution

[[0.02428758764917673, [0.003636026382446289, 0.022564036505562917]],
 [0.0349928528018646, [0.0017390251159667969, 0.02578411783490862]],
 [0.03314603702999006, [0.0019452571868896484, 0.02570192813873291]],
 [0.03464669899307469, [0.01284948984781901, 0.02557012438774109]]]

In [34]:
fig = plt.figure(figsize=(6, 5))

hatch_pattern = ["/", "\\", "|", "-", "+", "x", "o", "O", ".", "*"]
colors = ["#90ee90", "#add8e6", "#ffb6c1", "#e6e6fa", "#d3d3d3", "#f08080", "#ffffe0", "#87cefa", "#d3d3d3"]

l = ["TTFT", "TBT"]

# for i in range(2):
plt.bar([i for i in range(1, 5)], [val[0] for val in ttft_distributions], label="TTFT", width=0.5)
# plt.bar([i for i in range(1, 5)], [val[0] for val in tbt_distriubution], label="TBT", width=0.5)
# plt.errorbar([i for i in range(1, 5)], [val[0] for val in ttft_distributions],
#               yerr=[[val[1][0] for val in ttft_distributions], [val[1][1] for val in ttft_distributions]])
    # plt.bar([i + 1], [ttft_distributions[i][0]], width=0.5, color=colors[0], label="TTFT")
    # plt.bar([i + 1.5], [tbt_distriubution[i][0]], width=0.5, color=colors[1], label="TBT")
    # plt.errorbar([i + 1], bars[i], yerr=[[errors[i][0]], [errors[i][1]]], fmt="o", color='r')

plt.xticks(ticks=[1, 2, 3, 4], labels=traces_to_plot.values(), fontsize=18)
plt.yticks(fontsize=20)
plt.xlabel(f"Strategy", fontsize=20)
plt.ylabel("Average TTFT", fontsize=16)
# plt.legend(fontsize=20)
fig.tight_layout()
plt.savefig("tmp.pdf")
plt.close(fig)

In [35]:
plt.close('all')

In [36]:
cutoff_time = 72

In [37]:
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    print(df["input99app"].unique())
    print(df["output99app"].unique())

[ 4489  5770  2901  8829  6370  7011 14854  6481  1172  4606  1979   999
  3758  3714  1735  8028 16334  2515  3296  1809  9224  2587   330]
[212  57 153  53 102 577  74  66   4  48 128  32 538  92 535 499 134  86
  87  77  43]
[ 4489  5770  2901  8829  6370  7011 14854  6481  1172  4606  1979   999
  3758  3714  1735  8028 16334  2515  3296  1809  9224  2587   330]
[212  57 153  53 102 577  74  66   4  48 128  32 538  92 535 499 134  86
  87  77  43]
[ 4489  5770  2901  8829  6370  7011 14854  6481  1172  4606  1979   999
  3758  3714  1735  8028 16334  2515  3296  1809  9224  2587   330]
[212  57 153  53 102 577  74  66   4  48 128  32 538  92 535 499 134  86
  87  77  43]
[ 4489  5770  2901  8829  6370  7011 14854  6481  1172  4606  1979   999
  3758  3714  1735  8028 16334  2515  3296  1809  9224  2587   330]
[212  57 153  53 102 577  74  66   4  48 128  32 538  92 535 499 134  86
  87  77  43]


In [121]:
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    # df = df.loc[df["request_latency"] > 0]
    # df["FinishTime"] = df["req_time"] + df["request_latency"]
    # df = df.loc[df["FinishTime"] <= cutoff_time]
    df_app_0 = df.loc[df["input_avg_app"] == 5770]
    df_app_1 = df.loc[df["input_avg_app"] == 8028]
    df_app_2 = df.loc[df["input_avg_app"] == 999]
    print(trace_name)
    print(len(df_app_0))
    print(len(df_app_1))
    print(len(df_app_2))

RPM
4403
26
448
VTC
4403
26
448
FairServe WSC
4403
26
448
FairServe
4403
26
448


In [82]:
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    df = df.loc[df["request_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    df_app_0 = df.loc[df["input_avg_app"] == 5770]
    df_app_1 = df.loc[df["input_avg_app"] == 14854]
    df_app_2 = df.loc[df["input_avg_app"] == 999]
    print(trace_name)
    print(len(df_app_0))
    print(len(df_app_1))

RPM
1788
5
VTC
1756
56
FairServe WSC
1758
56
FairServe
1766
56


In [90]:
cutoff_time = 76

In [134]:
for trace, trace_name in traces_to_plot.items():
# for trace, trace_name in dict({"../downsampled_data_10000_rows_first_arrivals.csv___fs_fair_interaction_limit_expect___10___2000___avg.csv": "FS"}).items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    print(len(df.loc[df["app"] == 14]), df.loc[df["app"] == 14]["output_avg_app"].max())
    df = df.loc[df["request_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    df_app_0 = df.loc[df["input_avg_app"] == 5770]
    df_app_1 = df.loc[df["app"] == 14]
    df_app_2 = df.loc[df["input_avg_app"] == 999]
    app_0_total_time = df_app_0["FinishTime"].max() - df_app_0["req_time"].min()
    app_1_total_time = df_app_1["FinishTime"].max() - df_app_1["req_time"].min()
    app_2_total_time = df_app_2["FinishTime"].max() - df_app_2["req_time"].min()
    print(f"{trace_name}")
    # if trace_name != "FairServe":
    #     continue
    # for app in df["app"].unique():
    #     df_here = df.loc[df["app"] == app]
    #     # print(len(df_here))
    #     app_total_time = df_here["FinishTime"].max() - df_here["req_time"].min()
    #     app_tp = df_here['prompt_len'].sum() / app_total_time
    #     app_in = df_here["input_avg_app"].max()
    #     if  app_in > 14854 and  app_tp > 359.02:
    #         print(app, app_in, app_tp)
    #     elif app_in < 14854 and app_tp < 359.02 and 999 < app_in and 127.58 < app_tp:
    #         print(app, app_in, app_tp)
    #     elif app_in < 999 and app_tp < 127.58:
    #         print(app, app_in, app_tp)
    #     # print(df_here["input_avg_app"].max(), df_here['prompt_len'].sum() / app_total_time)
    # break
    print(f"\tApplication 0 prompt throughput: {df_app_0['prompt_len'].sum() / app_0_total_time} with {len(df_app_0)} requests")
    print(f"\tApplication 0 decode throughput: {df_app_0['output_len'].sum() / app_0_total_time} with {len(df_app_0)} requests")
    print(f"\tApplication 1 prompt throughput: {df_app_1['prompt_len'].sum() / app_1_total_time} with {len(df_app_1)} requests")
    print(f"\tApplication 1 decode throughput: {df_app_1['output_len'].sum() / app_1_total_time} with {len(df_app_1)} requests")
    print(f"\tApplication 2 prompt throughput: {df_app_2['prompt_len'].sum() / app_2_total_time} with {len(df_app_2)} requests")
    print(f"\tApplication 2 decode throughput: {df_app_2['output_len'].sum() / app_2_total_time} with {len(df_app_2)} requests")

269 102
RPM
	Application 0 prompt throughput: 6924.092713027376 with 871 requests
	Application 0 decode throughput: 72.30318751809662 with 871 requests
	Application 1 prompt throughput: 360.9986931712718 with 56 requests
	Application 1 decode throughput: 9.02895068988027 with 56 requests
	Application 2 prompt throughput: 5.193416388824484 with 2 requests
	Application 2 decode throughput: 0.18116568798224944 with 2 requests
269 102
VTC
	Application 0 prompt throughput: 6783.737546016335 with 844 requests
	Application 0 decode throughput: 69.14552693828517 with 844 requests
	Application 1 prompt throughput: 256.1420480471294 with 37 requests
	Application 1 decode throughput: 6.517918402969468 with 37 requests
	Application 2 prompt throughput: 124.1900890888604 with 73 requests
	Application 2 decode throughput: 7.985706756128246 with 73 requests
269 102
FairServe WSC
	Application 0 prompt throughput: 6761.9098262348725 with 838 requests
	Application 0 decode throughput: 68.33060380990406 

In [104]:
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    df = df.loc[df["request_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    df_app_0 = df.loc[df["input_avg_app"] == 5770]
    df_app_1 = df.loc[df["input_avg_app"] == 14854]
    df_app_2 = df.loc[df["input_avg_app"] == 999]
    df_app_0_interactions = df_app_0.groupby("interaction_id").agg({"FinishTime": "max", "req_time": "min", "prompt_len": "sum", "output_len": "sum", "sys_avg_app": "count", "llmcalls": "max", "llmcalls_made": "max"})
    df_app_1_interactions = df_app_1.groupby("interaction_id").agg({"FinishTime": "max", "req_time": "min", "prompt_len": "sum", "output_len": "sum", "sys_avg_app": "count", "llmcalls": "max", "llmcalls_made": "max"})
    df_app_2_interactions = df_app_2.groupby("interaction_id").agg({"FinishTime": "max", "req_time": "min", "prompt_len": "sum", "output_len": "sum", "sys_avg_app": "count", "llmcalls": "max", "llmcalls_made": "max"})
    app_0_throughput = (df_app_0_interactions["FinishTime"] - df_app_0_interactions["req_time"]) / (df_app_0_interactions["output_len"] )
    app_1_throughput = (df_app_1_interactions["FinishTime"] - df_app_1_interactions["req_time"]) / (df_app_1_interactions["output_len"] )
    app_2_throughput = (df_app_2_interactions["FinishTime"] - df_app_2_interactions["req_time"]) / (df_app_2_interactions["output_len"] )
    print(f"{trace_name}")
    print(f"\tApplication 0 throughput: {app_0_throughput.sum()} with {len(df_app_0)} requests")
    print(f"\tApplication 1 throughput: {app_1_throughput.sum()} with {len(df_app_1)} requests")
    print(f"\tApplication 2 throughput: {app_2_throughput.sum()} with {len(df_app_2)} requests")


RPM
	Application 0 throughput: 1994.886178565727 with 871 requests
	Application 1 throughput: 1.4817104472054374 with 4 requests
	Application 2 throughput: 9.648518204689026 with 2 requests
VTC
	Application 0 throughput: 2350.44026795222 with 844 requests
	Application 1 throughput: 110.64310042892527 with 29 requests
	Application 2 throughput: 823.8643164126904 with 73 requests
FairServe WSC
	Application 0 throughput: 2347.8866523761617 with 838 requests
	Application 1 throughput: 109.99563677499306 with 29 requests
	Application 2 throughput: 842.780623238124 with 73 requests
FairServe
	Application 0 throughput: 2135.3414594308074 with 837 requests
	Application 1 throughput: 95.46785898525994 with 27 requests
	Application 2 throughput: 1055.220939726249 with 80 requests


In [71]:
cutoff_time = 150

In [81]:
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    df = df.loc[df["request_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    df_app_0 = df.loc[df["app"] == 20]
    df_app_1 = df.loc[df["app"] == 13]
    df_app_0_users = df_app_0.groupby("adapter_dir").agg({"req_time": "min", "FinishTime": "max", "prompt_len": "sum", "output_len": "sum"})
    df_app_1_users = df_app_1.groupby("adapter_dir").agg({"req_time": "min", "FinishTime": "max", "prompt_len": "sum", "output_len": "sum"})
    app_0_throughputs = (df_app_0_users["prompt_len"] + df_app_0_users["output_len"]) / (df_app_0_users["FinishTime"] - df_app_0_users["req_time"])
    app_1_throughputs = (df_app_1_users["prompt_len"] + df_app_1_users["output_len"]) / (df_app_1_users["FinishTime"] - df_app_1_users["req_time"])
    # print(app_0_throughputs, app_1_throughputs)
    app_0_vals = list(app_0_throughputs)
    app_1_vals = list(app_1_throughputs)
    cnt = 0
    for val0 in app_0_vals:
        for val1 in app_1_vals:
            if val1 / val0 <= 1.17 and val1 / val0 >= 0.97:
                cnt += 1
    print(f"{cnt} user pairs between apps 20 and 13 have correct proportion in strategy {trace_name}")

1 user pairs between apps 20 and 13 have correct proportion in strategy RPM
2 user pairs between apps 20 and 13 have correct proportion in strategy VTC
2 user pairs between apps 20 and 13 have correct proportion in strategy FairServe WSC
2 user pairs between apps 20 and 13 have correct proportion in strategy FairServe


In [62]:
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    df = df.loc[df["request_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    df_app_0 = df.loc[df["input_avg_app"] + df["output_avg_app"] == 8120]
    df_app_1 = df.loc[df["input_avg_app"] + df["output_avg_app"] == 7568]
    print(len(df_app_0), len(df_app_1))

3 0
2 0
2 0
2 0


In [295]:
fig = plt.figure(figsize=(6, 5))

itr = 0
offset = 1.5/(len(traces_to_plot) + 1)

hatch_pattern = ["/", "\\", "|", "-", "+", "x", "o", "O", ".", "*"]
colors = ["#90ee90", "#add8e6", "#ffb6c1", "#e6e6fa", "#d3d3d3", "#f08080", "#ffffe0", "#87cefa", "#d3d3d3"]

for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    df = df.loc[df["request_latency"] > 0]
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    df["weighted_service_eq_2"] = alpha * df["input_avg_app"] + gamma * df["output_avg_app"] + beta * df["sys_avg_app"]
    df["weighted_service_eq_3"] = (alpha * df["prompt_len"] + gamma * df["output_len"]) / df["weighted_service_eq_2"]
    df["weighted_service_eq_3"] = df.apply(lambda x: adjust_wsc(x, cutoff_time), axis=1)
    df_app_user_wsc = df.groupby(by=["app"])["weighted_service_eq_3"].agg([("wsc", "sum")]).reset_index()
    # df_app_wsc["adapter_dir"] = df_app_wsc["adapter_dir"].apply(lambda x: int(x.split("-")[-1]))
    # print(df_app_wsc["wsc"])
    df_app_user_wsc["wsc_bins"] = df_app_user_wsc["wsc"].apply(assign_wsc_bucket)
    if trace_name == "RPM" or trace_name == "FairServe":
        continue
    # if trace_name.find("FairServe") >= 0:
    #     df_app_user_wsc["wsc"] = df_app_user_wsc["wsc"].apply(lambda x: min(x, 2))
    plt.plot(df_app_user_wsc["wsc"], label=trace_name)
    # arr = []
    # for i in range(3):
    #     try:
    #         arr.append(df_users_wsc["wsc_bins"].value_counts().loc[i])
    #     except:
    #         arr.append(0)
    # print(arr)
    # x = [i * 1.5 + itr * offset for i in range(3)]
    # plt.bar(x, arr, label=trace_name, width=offset, hatch=hatch_pattern[itr], color=colors[itr], alpha=.99)
    # itr += 1

# t = [i * 1.5 + itr//2 * offset - offset/2 for i in range(3)]
# plt.xticks(ticks=t, labels=["$S_i=0$", "$0<S_i\leq 1$", "$1<S_i$"], fontsize=14)
# plt.yticks(fontsize=14)
# plt.xlabel(f"Weighted Service Counter ($S_i$)", fontsize=20)
# plt.ylabel("Number of users", fontsize=20)
# plt.ylim(0, 40)
plt.legend(fontsize=16)
# fig.tight_layout()
plt.savefig("tmp.pdf", dpi=300)
plt.close(fig)

In [59]:
fig = plt.figure(figsize=(6, 5))

itr = 0
offset = 1.5/(len(traces_to_plot) + 1)

hatch_pattern = ["/", "\\", "|", "-", "+", "x", "o", "O", ".", "*"]
colors = ["#90ee90", "#add8e6", "#ffb6c1", "#e6e6fa", "#d3d3d3", "#f08080", "#ffffe0", "#87cefa", "#d3d3d3"]

for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    df["FinishTime"] = df["req_time"] + df["request_latency"]
    df = df.loc[df["FinishTime"] <= cutoff_time]
    df = df.groupby(by="app").agg({"adapter_dir": "nunique"}).reset_index()
    if trace_name == "RPM":
        continue
    print(df)
    plt.plot(df["adapter_dir"], label=trace_name)
    # arr = []
    # for i in range(3):
    #     try:
    #         arr.append(df_users_wsc["wsc_bins"].value_counts().loc[i])
    #     except:
    #         arr.append(0)
    # print(arr)
    # x = [i * 1.5 + itr * offset for i in range(3)]
    # plt.bar(x, arr, label=trace_name, width=offset, hatch=hatch_pattern[itr], color=colors[itr], alpha=.99)
    # itr += 1

# t = [i * 1.5 + itr//2 * offset - offset/2 for i in range(3)]
# plt.xticks(ticks=t, labels=["$S_i=0$", "$0<S_i\leq 1$", "$1<S_i$"], fontsize=14)
# plt.yticks(fontsize=14)
# plt.xlabel(f"Weighted Service Counter ($S_i$)", fontsize=20)
# plt.ylabel("Number of users", fontsize=20)
# plt.ylim(0, 40)
plt.legend(fontsize=16)
# fig.tight_layout()
plt.savefig("tmp.pdf", dpi=300)
plt.close(fig)

    app  adapter_dir
0     0            9
1     1           10
2     2            1
3     3           52
4     5           19
5     6            2
6     7          160
7     8            1
8     9           15
9    10            1
10   11            1
11   13            6
12   14           21
13   15            3
14   16            8
15   17          203
16   18            6
17   19           22
18   20            2
19   21            1
20   22            5
21   23            5
    app  adapter_dir
0     0            9
1     1           10
2     2            1
3     3           48
4     5           19
5     6            2
6     7          159
7     8            1
8     9           15
9    10            1
10   11            1
11   13            6
12   14           20
13   15            3
14   16            8
15   17          201
16   18            6
17   19           20
18   20            2
19   21            1
20   22            5
21   23            5
    app  adapter_dir
0     0      

In [103]:
fig = plt.figure(figsize=(6, 5))
for trace, trace_name in traces_to_plot.items():
    with open(trace[:-3] + "jsonl") as f:
        ds = json.load(f)
        df = pd.DataFrame(ds["result"]["responses"])
    df = df.rename(columns={"input99app": "input_avg_app", "output99app": "output_avg_app", "sys99app": "sys_avg_app"})
    # df = df.loc[df["request_latency"] > 0]
    # df["FinishTime"] = df["req_time"] + df["request_latency"]
    # df = df.loc[df["FinishTime"] <= cutoff_time]
    df_app_0 = df.loc[df["input_avg_app"] == 5770]
    df_app_1 = df.loc[df["input_avg_app"] == 14854]
    df_app_2 = df.loc[df["input_avg_app"] == 999]
    print(df_app_0["prompt_len"].mean())
    print(df_app_1["prompt_len"].mean())
    print(df_app_2["prompt_len"].mean())
    # print(len(df_app_1))
    # print(len(df_app_2))
    plt.scatter(df_app_0["req_time"], [1 for i in range(len(df_app_0))], label="App 2", s=1)
    plt.scatter(df_app_1["req_time"], [2 for i in range(len(df_app_1))], label="App 7", s=1)
    plt.scatter(df_app_2["req_time"], [3 for i in range(len(df_app_2))], label="App 12", s=1)
    break
plt.savefig("./tmp.pdf")
plt.close(fig)

594.8464683170565
1100.2158273381294
133.67633928571428
