# Fit LLC vs Loss, power law vs exponential

* Use our "preferred model" (which is currently 3 parameter power law).
* row = model
* columns linear, log, r by l* (parameter introspection) with text labels (why not)


In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from lsoc.powerlaw import fit, data, vis

In [2]:
SAVE = False  # write out figures

In [3]:
# be conservative initially
_start = 2000  # we believe this should be in-common
m_codes = [
    ('14m', _start, 20000),
    ('31m',_start, 20000),
    ('70m', _start, 20000),
    ('160m', _start, 30000),
    ('410m-dense', _start, 80000),
    ('1b', _start, 30000),
]

function = fit.OffsetPowerLaw
colors = None

# Put into rows and columns

titles = []
for m_code, start_step, end_step in m_codes:
    msize = m_code.split("-")[0]
    desc = f"Pythia-{msize} on [{start_step//1000}k, {end_step//1000}k]"
    titles.append(desc)
    
fig = make_subplots(
    rows=3, cols=2,
    subplot_titles=titles,
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
)
fig.update_layout(width=1000, height=400*3)

row = 1
col = 0


for m_code, start_step, end_step in m_codes:
    msize = m_code.split("-")[0]
    df_llc, df_loss = data.load_dfs(m_code)
    tasks = df_llc.columns
    if colors is None:
        # Assume the first model has all tasks
        colors = vis.assign_cols(df_llc.columns)

    col += 1
    if col == 3:
        col = 1
        row += 1
    subplot = dict(row=row, col=col)

    # fig = go.Figure()
    shift = None
    
    for task in tasks:
        color = colors[task]
        color2 = vis.add_color(color)  # for heldout data
        #full = data.trim_trace(df_llc, df_loss, task, start_step // 5, end_step * 5)
        full = data.trim_trace(df_llc, df_loss, task, 0, 1000000)
        #result = fit.min_fit(trace.x, trace.y, function)
        #shift = result.params_dict
        vis.plot_data(fig, *full, color=color2, shift=shift, mode='lines+markers', subplot=subplot)

    for task in tasks:
        color = colors[task]
        trace = data.trim_trace(df_llc, df_loss, task, start_step, end_step)
        
        vis.plot_data(fig, *trace, color=color, showlegend=(row+col==2),
                      name=task, shift=shift, mode='lines+markers', subplot=subplot)
        #vis.plot_result(fig, trace.x, result, shift=shift, color=color)
    # fig.update_xaxes(title_text=r"$\text{Estimated and transformed LLC }\,\frac{1}{100}\hat{\lambda}$", **subplot)
    # fig.update_yaxes(title_text=r"$\text{Loss }L$", **subplot)
    
    #fig.update_layout(title=desc, width=800, height=600)
#fig.show()
    
# fig.update_layout(
#     title="",
#     width=1200,
#     height=410* len(functions),
#     showlegend=True,
#     legend_tracegroupgap=180,  # annoying - have to eyeball this
# )
fname = f"plots/select_intervals.pdf"
if SAVE:
    fig.write_image(fname)
    print(f"Done. See {fname}")
fig.show()

NotADirectoryError: [Errno 20] Not a directory: '/Users/Jesse/Projects/lsoc-psych/.venv/lib/python3.11/site-packages/lsoc/powerlaw/data.py/data/llc/pythia-test-losses-new-seed-1024.pkl'

# Make the input data plot for pythia 410m for the introduction


In [None]:
df_llc, df_loss = data.load_dfs("410m-dense")
start_step = 2000
end_step = 80000

tasks = df_llc.columns

fig = go.Figure()

for task in tasks:
    color = colors[task]
    trace = data.trim_trace(df_llc, df_loss, task, start_step, end_step)    
    result = fit.min_fit(trace.x, trace.y, fit.OffsetPowerLaw)
    shift = None  # result.params_dict
    vis.plot_data(fig, *trace, color=color, name=task, showlegend=False, shift=shift)
    vis.plot_result(fig, trace.x, result, color=color, name=task, showlegend=True, shift=shift)

fig.update_layout(width=800, height=600)
#title=f"Pythia-{msize} Loss vs LLC on steps [{start_step}, {end_step}]", 
fig.update_xaxes(title_text=r"$\text{Estimated and transformed LLC }\,\frac{1}{100}\hat{\lambda}$")
fig.update_yaxes(title_text=r"$L$")
#fig.show()

fname = f"plots/title_{msize}.pdf"
if SAVE:
    fig.write_image(fname)
    print(f"Done. See {fname}")
fig.show()

# What does the parameter distribution look like?

In [None]:
titles = []
for m_code, start_step, end_step in m_codes:
    msize = m_code.split("-")[0]
    titles.append(f"Parameters for Pythia-{msize} on [{start_step}, {end_step}]")
fig = make_subplots(
    rows=3, cols=2,
    subplot_titles=titles,
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
)
fig.update_layout(width=1200, height=410*3)

row = 1
col = 0

for m_code, start_step, end_step in m_codes:
    msize = m_code.split("-")[0]
    df_llc, df_loss = data.load_dfs(m_code)
    tasks = df_llc.columns

    col += 1
    if col == 3:
        col = 1
        row += 1
    subplot = dict(row=row, col=col)
        
    for task in tasks:
        trace = data.trim_trace(df_llc, df_loss, task, start_step, end_step)
        result = fit.min_fit(trace.x, trace.y, fit.OffsetPowerLaw)
        pars = result.params_dict
        desc = f"{msize}-{task}"
        fig.add_trace(go.Scatter(
            x=[pars["r"]],
            y=[pars["y*"]],
            name=task,
            marker=dict(
                color=colors[task],
                size=10,
            ),
            showlegend=(row+col==2),
            mode='markers+text',
            text=task,
            textposition='top center',
            textfont=dict(size=5),
        ), **subplot)
    fig.update_xaxes(title_text=r"Exponent r", **subplot)
    fig.update_yaxes(title_text=r"Loss Offset L*", **subplot)


fname = f"plots/parameters_grid.pdf"
if SAVE:
    fig.write_image(fname)
    print(f"Done. See {fname}")
fig.show()

## Are there any trends over model size?

In [None]:
# Is there any pattern over model size?

df_llc, df_loss = data.load_dfs("160m")  # I believe 160m has a full task list
tasks = df_llc.columns
task_xs = {t:[] for t in tasks}
task_ys = {t:[] for t in tasks}
task_siz = {t:[] for t in tasks}


for m_code, start_step, end_step in m_codes:  # [1:]:
    msize = m_code.split("-")[0]
    df_llc, df_loss = data.load_dfs(m_code)
    
    # Not every model has a full task list
    for task in df_llc.columns:
        trace = data.trim_trace(df_llc, df_loss, task, start_step, end_step)
        result = fit.min_fit(trace.x, trace.y, fit.OffsetPowerLaw)
        pars = result.params_dict
        desc = f"{msize}-{task}"
        x = pars["r"]
        y = pars["y*"]
        task_xs[task].append(x)
        task_ys[task].append(y)
        task_siz[task].append(msize.upper())


fig = go.Figure()

for task in tasks:

    fig.add_trace(go.Scatter(
        x=task_xs[task],
        y=task_ys[task],
        customdata=task_siz[task],
        marker=dict(
            color=colors[task],
            size=6,
        ),
        mode='markers+lines+text',
        name=task,
        text=task_siz[task],
        textfont=dict(size=8),
        textposition='top right',
        hovertemplate="Model: %{customdata}<br><extra></extra>",
    ))
fig.update_xaxes(title_text="$r$")
fig.update_yaxes(title_text="$L^*$")
fig.update_layout(title="", width=800, height=600)


if SAVE:
    fname = f"plots/parameters_all.pdf"
    fig.write_image(fname)
    
    thumb = f"plots/parameters_thumbnail_all.png"
    fig.write_image(thumb, scale=2)
    print(f"Done. See {fname}")
fig.show()


# Final array - ALL THE FITS

In [None]:
# Now make the final figure for each of the datasets:
# Row: model size (6 rows)
# Column: fit: linear, fit_logspace, parameters
function = fit.OffsetPowerLaw  # our chosen model

batches = [
    ("plots/models1.pdf", m_codes[:3]),
    ("plots/models2.pdf", m_codes[3:]),
]
    
for fname, codes in batches:

    # Make a figure
    titles = []
    for m_code, start_step, end_step in codes:
        for view in ["Linear", "Log"]:
            msize = m_code.split("-")[0]
            titles.append(f"Pythia-{msize} on [{start_step//1000}k, {end_step//1000}k]")
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=titles,
        horizontal_spacing=0.1,
        vertical_spacing=0.1,
    )
    fig.update_layout(
        width=1200,
        height=410*3,
        legend_tracegroupgap=120,  # annoying - have to eyeball this
    )
    row = 0
    for m_code, start_step, end_step in codes:
        row += 1
        msize = m_code.split("-")[0]
        df_llc, df_loss = data.load_dfs(m_code)
        tasks = df_llc.columns
        
        for task in tasks:
            color = colors[task]
            trace = data.trim_trace(df_llc, df_loss, task, start_step, end_step)
            result = fit.min_fit(trace.x, trace.y, function)
            func_desc = vis.dict2txt(result.params_dict, ",").replace("y*", "L*").replace(": ", ":")

            y_fit = result.f(trace.x)
            r2_score = fit.r2_score(trace.y, y_fit)
            
            # Plot the linear and logspace fits
            for col, shift in ((1, None), (2, result.params_dict)):
                subplot = dict(row=row, col=col)
                vis.plot_data(fig, *trace, color=color, shift=shift, subplot=subplot, showlegend=False)
                vis.plot_result(fig, trace.x, result, color=color, shift=shift, subplot=subplot, showlegend=col==1,
                               legendgroup=msize, name=f"{task} {func_desc}: R2={r2_score:.4f}")
                fig.update_xaxes(title_text=r"$\text{Estimated and transformed LLC }\,\frac{1}{100}\hat{\lambda}$", **subplot)
                if col == 1:
                    fig.update_yaxes(title_text=r"$\text{Loss }L$", **subplot)
                elif col == 2:
                    fig.update_yaxes(title_text=r"$\text{Loss }L - L^*$", **subplot)

    if SAVE:
        fig.write_image(fname)
        print(f"Saved. See {fname}")
    fig.show()

# Alt view with table

In [None]:
# Now make the final figure for each of the datasets:
# Row: model size (6 rows)
# Column: fit: linear, fit_logspace, parameters
function = fit.OffsetPowerLaw  # our chosen model

batches = [
    ("plots/models1.pdf", m_codes[:3]),
    ("plots/models2.pdf", m_codes[3:]),
]

report = []  # store rows of the table


for fname, codes in batches:

    # Make a figure
    titles = []
    for m_code, start_step, end_step in codes:
        for view in ["Linear", "Log"]:
            msize = m_code.split("-")[0]
            titles.append(f"Pythia-{msize} on [{start_step//1000}k, {end_step//1000}k]")
    
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=titles,
        horizontal_spacing=0.1,
        vertical_spacing=0.1,
    )
    fig.update_layout(
        width=1200,
        height=550*len(codes),
        # legend=dict(
        #     yanchor='top',   # anchor point
        #     y=-0.07,          # position below plots (negative value moves it below)
        #     xanchor='right', # center horizontally
        #     x=0.8,            # center position (0.5 = middle)
        # )
    )
    row = 0
    for m_code, start_step, end_step in codes:
        row += 1
        msize = m_code.split("-")[0]
        df_llc, df_loss = data.load_dfs(m_code)
        tasks = df_llc.columns
        
        for task in tasks:

            color = colors[task]
            trace = data.trim_trace(df_llc, df_loss, task, start_step, end_step)
            result = fit.min_fit(trace.x, trace.y, function)
            func_desc = vis.dict2txt(result.params_dict, ",").replace("y*", "L*").replace(": ", ":")

            y_fit = result.f(trace.x)
            r2_score = fit.r2_score(trace.y, y_fit)
            params = result.params_dict
            # on the inner loop, add rows to the report            
            report_row = {
                "Model": "Pythia-"+msize.upper(),
                "dataset": task,
                "L*": params["y*"],
                "c": params["c"],
                "r": params["r"],
                "R2": r2_score,
            }
            report.append(report_row)
            
            # Plot the linear and logspace fits
            for col, shift in ((1, None), (2, result.params_dict)):
                subplot = dict(row=row, col=col)
                vis.plot_data(fig, *trace, color=color, shift=shift, subplot=subplot, showlegend=False)
                vis.plot_result(fig, trace.x, result, color=color, shift=shift, subplot=subplot, showlegend=(row+col==2),
                               legendgroup=msize, name=task)
                fig.update_xaxes(title_text=r"$\text{Estimated and transformed LLC }\,\frac{1}{100}\hat{\lambda}$", **subplot)
                if col == 1:
                    fig.update_yaxes(title_text=r"$\text{Loss }L$", **subplot)
                elif col == 2:
                    fig.update_yaxes(title_text=r"$\text{Loss }L - L^*$", **subplot)


    if SAVE:
        fig.write_image(fname)
        print(f"Done. See {fname}")
    fig.show()
        
pd.DataFrame(report)

In [None]:
# save the report

# make it \scriptsize
df = pd.DataFrame(report)
latex_output = []
latex_output.append(r"""
\begin{figure}[htbp]
\centering
\scriptsize
""".strip())

grouped = df.groupby('Model', sort=False)
columns = 2

for i, (model_name, group_df) in enumerate(grouped):
    
    group_df = group_df.drop('Model', axis=1)  # Don't want it in the table
    
    # handle columns in subfigs:
    if i % 2 == 0 and i != 0:  # Start new row
        latex_output.append("\n\\vspace{1cm}")
    
    if i % 2 == 0:  # First table in row
        latex_output.append("\n\\begin{subfigure}[t]{0.45\\textwidth}")
    else:  # Second table in row
        latex_output.append("\\hfill\n\\begin{subfigure}[t]{0.45\\textwidth}")

    latex_output.append(    
        group_df.to_latex(
            index=False,  # optional, removes index column if you don't need it
            column_format='lrrrr',  # specify column alignments
            escape=True,  # if you need LaTeX commands in cells
            # label='tab:model_fit',
            # caption="Fit parameters and $R^2$ scores for each model and dataset.",
            float_format=lambda x: '%.2f' % x,  # format numbers
            formatters={
                'R2': lambda x: '%.4f' % x  # override for one column
            },
            longtable=False,  # for long tables
            # position='htbp'  # table positioning
        )
    )
    latex_output.append(f"\\caption{{{model_name}}}")
    latex_output.append("\\end{subfigure}")

# End the figure
latex_output.append(r"""
\caption{Fit parameters and $R^2$ scores for each model and dataset.}
\label{tab:model_fit}
\end{figure}
""")

# Join all parts
final_latex = '\n'.join(latex_output)

# Fix R^2
final_latex = final_latex.replace("R2", "$R^2$")
print(final_latex)
