In [None]:
import matplotlib.pyplot as plt
from ipywidgets import interact, Checkbox, IntSlider
import pandas as pd

# Helper Functions

In [None]:
models = ["Small", "Medium", "Large", "XL", "2.7B", "6.7B", "13B", "175B"]
Nmodels = len(models)

d = {
    "Name": models,
    "d_model": [768, 1024, 1536, 2048, 2560, 4096, 5140, 12288],
    "vocab_size": [None]*Nmodels,
    "params_embed [M]": [None]*Nmodels,
    "params_total [M]": [125, 350, 760, 1300, 2700, 6700, 13000, 175000],
    "params_embed/total": [None]*Nmodels,

}
df_base = pd.DataFrame(d)
# df_base

In [None]:
def get_df(_vocab_size):
    df = df_base.copy()
    df["vocab_size"] = [_vocab_size]*Nmodels
    df["params_embed [M]"] = df["vocab_size"]*df["d_model"] / 10**6
    df["params_total [M]"] += (df["vocab_size"]-50000)*df["d_model"] / 10**6
    df["params_embed/total"] = df['params_embed [M]'] / df['params_total [M]']
    df["params_embed/total"] = df["params_embed/total"].apply(lambda x: f"{100*x:.1f}%")
    return df

In [None]:
def plot_model_parameters(_dfs, _vocab_size):
    fig, ax = plt.subplots(1, 2, figsize=(16, 5))
    x = [i for i in range(Nmodels)]
    
    y_embed = {
        v: _dfs[v]["params_embed [M]"]
        for v in vs
    }
    y_total = {
        v: _dfs[v]["params_total [M]"]
        for v in vs
    }
    y_embed_rel = {
        v: _dfs[v]["params_embed [M]"] / _dfs[v]["params_total [M]"]
        for v in vs
    }
    y_total_rel = {
        v: 1
        for v in vs
    }
    for i, v in enumerate(vs):
        ax[0].bar([elem - 0.25 + i*0.1 for elem in x], y_total[v], align='edge', width=0.1, color="blue", label="total" if i == 0 else None)
        ax[0].bar([elem - 0.25 + i*0.1 for elem in x], y_embed[v], align='edge', width=0.1, color=[0.5, 0, 0], label="embedding" if i == 0 else None)
        if v == _vocab_size:
            ax[0].bar([elem - 0.25 + i*0.1 for elem in x], y_embed[v], align='edge', width=0.1, color="red", label=f"embedding vocab_size = {_vocab_size}")
        ax[1].bar([elem - 0.25 + i*0.1 for elem in x], y_total_rel[v], align='edge', width=0.1, color="blue")
        ax[1].bar([elem - 0.25 + i*0.1 for elem in x], y_embed_rel[v], align='edge', width=0.1, color=[0.5, 0, 0])
        if v == _vocab_size:
            ax[1].bar([elem - 0.25 + i*0.1 for elem in x], y_embed_rel[v], align='edge', width=0.1, color="red")
            
    #ax.set_yscale('log')
    ax[0].legend()
    ax[0].set_ylabel("# parameters")
    ax[1].set_ylabel("# parameters")
    ax[0].set_title("absolute")
    ax[1].set_title("relative")
    ax[0].set_xticks(x)
    ax[1].set_xticks(x)
    ax[0].set_xticklabels(models)
    ax[1].set_xticklabels(models)
    return fig, ax

# Overview

![gpt3](./gpt3_parameters.png)

In [None]:
vs = [50000*i for i in range(1, 6)]
dfs = {v: get_df(v) for v in vs}

@interact
def show_model_parameters(vocab_size=IntSlider(min=50000, max=250000, step=50000, value=50000)):
    plot_model_parameters(dfs, vocab_size)
    display(dfs[vocab_size])