In [1]:
import os
base_dir = os.getcwd()
tables_dir = os.path.join(base_dir, 'table')
os.makedirs(tables_dir, exist_ok=True)


In [2]:
from tabulate import tabulate

table_headers = ["Notation", "Description"]
table_data = [
    ("$S$", "The length of prompt"),
    ("$G$", "The length of generated tokens"),
    ("$B$", "The number of batched requests"),
    ("$T$", "The number of tokens per image"), 
    ("$L$", "The number of model layers"),
    ("$H$", "Input dimension of the hidden layer"),
    ("$M$", "The number of attention heads"),
    ("$D$", "The hidden stage per head"""),
    # ("$n$", "number of images"), 
    # ("$m$", "number of tokens"), 
    # ("$s$", "number of kv cache tokens of each request"), 
    # ("$h_v$, $h_l$", "hidden size of"), 
    # ("$d_v$, $d_l$", "head dimension of attention layer of vision mode"), 
    # ("$q_v$, $q_l$", "number of heads of attention layer of vision mode"), 
    # ("$i_v$", "intermediate size of feed forward layer of vision model"), 
    # ("$k$", "kernel size of the convolutional layer"), 
    # ("$a$", "width and height of image"), 
    # ("$c$", "number of channels of images"), 
    # ("$L_v$", "number of layers of vision model"), 
    # ("$h_l$", "hidden size of language model"), 
    # ("$d_l$", "head dimension of language model"), 
    # ("$q_l$", "number of query heads of attention layer of language model"), 
    # ("$k_l$", "number of key heads or value heads of attention layer of language model"), 
    # ("$i_l$", "intermediate size of feed forward layer of language model"), 
    # ("$V$", "vocabulary size of language model"), 
]

latex_table = "\\begin{tabular}{ll}\n\\toprule\n"
headers_str = ""
for i, head in enumerate(table_headers):
    headers_str += head
    if i != len(table_headers) - 1:
        headers_str += " & "
latex_table += f"{headers_str} \\\\\n\\midrule\n"
for row in table_data:
    latex_table += f"{row[0]} & {row[1]} \\\\\n"
latex_table += "\\bottomrule\n\\end{tabular}\n"

with open(os.path.join(tables_dir, "latency_model_notation.tex"), "w", encoding="utf-8") as f:
    f.write(latex_table)

for row in table_data:
    print(f"{row[0]:<8} {row[1]}")

$S$      The length of prompt
$G$      The length of generated tokens
$B$      The number of batched requests
$T$      The number of tokens per image
$L$      The number of model layers
$H$      Input dimension of the hidden layer
$M$      The number of attention heads
$D$      The hidden stage per head


In [3]:
from tabulate import tabulate

data = [
    {
        "operation": "QKVO Projection",
        "rows": [
            {"E/P/D": "encode",  "FLOPS": r"$8BTH^2$", "Memory": r"$8BTH+4H^2$"},
            {"E/P/D": "prefill", "FLOPS": r"$8BSH^2$", "Memory": r"$8BSH+4H^2$"},
            {"E/P/D": "decode", "FLOPS": r"$8BH$", "Memory": r"$8BH+4H^2$"},
        ]
    },
    {
        "operation": "FFN",
        "rows": [
            {"E/P/D": "encode",  "FLOPS": r"$16BTH^2$", "Memory": r"$4BTH+8H^2$"},
            {"E/P/D": "prefill", "FLOPS": r"$16BSH^2$", "Memory": r"$4BSH+8H^2$"},
            {"E/P/D": "decode", "FLOPS": r"$16BH^2$", "Memory": r"$4BH+8H^2$"},
        ]
    }, 
    {
        "operation": "Attention",
        "rows": [
            {"E/P/D": "encode", "FLOPS": r"$4BT^2H$", "Memory": r"$4BTH+2BT^2M$"},
            {"E/P/D": "prefill", "FLOPS": r"$4BS^2H$", "Memory": r"$4BSH+2BS^2M$"},
            {"E/P/D": "decode", "FLOPS": r"$4BSH$", "Memory": r"$4BSM+2BH(S+1)$"},
        ]
    }, 
]

def generate_latex_table(data):
    lines = []
    lines.append(r"\begin{tabular}{lcccc}")
    lines.append(r"\toprule")
    lines.append(r"Operation & E/P/D & FLOPS & Memory Access \\")
    lines.append(r"\midrule")

    for i, item in enumerate(data):
        op_name = item["operation"]
        rows = item["rows"]
        first_row = True
        for row in rows:
            if first_row:
                line = r"\multirow{{{}}}{{*}}{{{}}} & {} & {} & {} \\".format(
                    len(rows), op_name, row["E/P/D"], row["FLOPS"], row["Memory"]
                )
                first_row = False
            else:
                line = r" & {} & {} & {} \\".format(row["E/P/D"], row["FLOPS"], row["Memory"])
            lines.append(line)
        if i != len(data) - 1:
            lines.append('\hline')

    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    return "\n".join(lines)

latex_code = generate_latex_table(data)
with open(os.path.join(tables_dir, "latency_model.tex"), "w", encoding="utf-8") as f:
    f.write(latex_code)

print("LaTeX table saved to operator_table.tex")

LaTeX table saved to operator_table.tex
