For paper, probably want to use matplotlib instead of plotly

In [1]:
import os
import pickle
import numpy as np
from numpy.linalg import LinAlgError

# utils for plotting
import plotly.graph_objects as go
import plotly.colors as colors

In [3]:
score_dir = 'output/scores'
fig_dir = 'output/figures'
thin_list = ['full', 'st', 'kt'] #, 'rpcholesky']
method = 'krr'
kernel = 'gaussian'
ground_truth = 'sum_gauss'
logn_lo = 8
logn_hi = 18
n_trials = 100

In [5]:
results = []
names = []
for thin in thin_list:
    trials = (1 if thin == 'full' else n_trials)
    lo = logn_lo
    hi = logn_hi
    # hi = 10 if thin == 'rpcholesky' else logn_hi
    score_file = os.path.join(
        score_dir,
        f"{thin}-{method}-k={kernel}-gt={ground_truth}-logn={lo}_{hi}-t{trials}.pkl"
    )
    with open(score_file, 'rb') as f:
        result = pickle.load(f)
        
    for r in result:
        name = f"{thin}-{method}"
        if name not in names:
            names.append(name)
        r['name'] = name
        results.append(r)

FileNotFoundError: [Errno 2] No such file or directory: 'output/scores/full-krr-k=gaussian-gt=sum_gauss-logn=8_18-t1.pkl'

In [9]:
len(results)

14

## Test MSE

In [10]:
fig = go.Figure()

colors_list = colors.qualitative.Plotly * (
    len(thin_list) // len(colors.qualitative.Plotly) + 1
)
colors_used = set()

In [11]:
scale = 'log2'
baseline_loss = 0.01 # = noise**2

for result in results:
    name = result['name']
    color = colors_list[names.index(name)]

    if scale == 'log2':
        y = np.log2(np.abs(result["scores"]))
        hline = np.log2(np.abs(baseline_loss))

    elif scale == 'linear':
        hline = np.abs(baseline_loss)
        y = np.abs(result["scores"])

    trace = go.Box(
        x=[result['logn']]*len(result["scores"]),
        y=y,
        name=name,
        # opacity=0.5,
        legendgroup=name,
        line_color=color,
        offsetgroup=name,
        showlegend=color not in colors_used,
        boxmean=True,
    )

    fig.add_trace(trace)
    colors_used.add(color)

# add line for baseline loss
fig.add_hline(
    y=hline,
    line_dash="dash",
)

fig.update_yaxes(title_text=f"{scale}(test MSE)")
fig.update_xaxes(title_text="log2(n)", type='linear')
fig.update_layout(
    width=800,
    height=600,
    title=f"Test MSE vs n (kernel={kernel}, ground_truth={ground_truth})",
    boxmode='group'
)

In [12]:
# save fig to file
fig_file = os.path.join(
    fig_dir,
    f"test_mse-{method}-k={kernel}-gt={ground_truth}-logn={logn_lo}_{logn_hi}-t{n_trials}.png"
)
fig.write_image(fig_file)

## Excess Risk Scaling

In [13]:
trendline_data = {name : {'x':[], 'y':[], 'y_std':[]} for name in names}
for result in results:
    name = result["name"]    
    excess_risk = np.maximum(np.abs(result[f"scores"]) - np.abs(baseline_loss), 0)
    log_excess_risk = np.log2(excess_risk)

    trendline_data[name]['x'].append(result['logn'])
    # to avoid errors with taking log of negative numbers
    trendline_data[name]['y'].append( np.mean(log_excess_risk) )
    trendline_data[name]['y_std'].append(np.std(log_excess_risk))


divide by zero encountered in log2


invalid value encountered in subtract



In [14]:
fig = go.Figure()

for name in names:
    x = trendline_data[name]['x']
    y = trendline_data[name]['y']

    # add scatter
    scatter = go.Scatter(
        x=x,
        y=y,
        name=name,
        legendgroup=name,
        error_y=dict(
            type='data',
            array=trendline_data[name]['y_std'],
            visible=True,
        ),
        # use markers
        mode='markers',
        marker=dict(
            color=colors_list[names.index(name)],
        )
    )
    fig.add_trace(scatter)

    # add trendline
    try:
        z = np.polyfit(x,y,1)
        y_hat = np.poly1d(z)(x)
        fig.add_trace(go.Scatter(
            x=x,
            y=y_hat,
            name=name,
            legendgroup=name,
            showlegend=False,
            line=dict(
                dash='dash',
                color=scatter['marker']['color'],
            ),
        ))

        # add slope annotation
        fig.add_annotation(
            xref="paper", yref="paper",
            x=0, y=0.25 * names.index(name) / len(names),
            # x=0,y=0,
            text=f"{name}: n^{z[0]:.2f}",
            showarrow=False,
        )
    except LinAlgError:
        print(f"cannot fit trendline for {name}")

fig.update_layout(
    width=800,
    height=600,
    title=f'Excess MSE vs n (kernel={kernel}, ground_truth={ground_truth})',
)
# yaxis title
fig.update_yaxes(title_text=f'log2(excess MSE)')
# xaxis title
fig.update_xaxes(title_text='log2(n)')
fig.update_xaxes(title_text='log2(n)')
fig.show()

## Training times

In [15]:
fig = go.Figure()

colors_list = colors.qualitative.Plotly * (
    len(thin_list) // len(colors.qualitative.Plotly) + 1
)
colors_used = set()

In [16]:
scale = 'linear'
baseline_loss = 0.01 # = noise**2

for result in results:
    name = result['name']
    color = colors_list[names.index(name)]

    if scale == 'log2':
        y = np.log2(np.abs(result["train_times"]))
        hline = np.log2(np.abs(baseline_loss))

    elif scale == 'linear':
        hline = np.abs(baseline_loss)
        y = np.abs(result["train_times"])

    trace = go.Box(
        x=[result['logn']]*len(result["train_times"]),
        y=y,
        name=name,
        # opacity=0.5,
        legendgroup=name,
        line_color=color,
        offsetgroup=name,
        showlegend=color not in colors_used,
        boxmean=True,
    )

    fig.add_trace(trace)
    colors_used.add(color)

fig.update_yaxes(title_text=f"{scale}(train time)")
fig.update_xaxes(title_text="log2(n)", type='linear')
fig.update_layout(
    width=800,
    height=600,
    title=f"Train times vs n (kernel={kernel}, ground_truth={ground_truth})",
    boxmode='group'
)

In [17]:
# save fig to file
fig_file = os.path.join(
    fig_dir,
    f"train_time-{method}-k={kernel}-gt={ground_truth}-logn={logn_lo}_{logn_hi}-t{n_trials}.png"
)
fig.write_image(fig_file)

## Predict times

In [18]:
fig = go.Figure()

colors_list = colors.qualitative.Plotly * (
    len(thin_list) // len(colors.qualitative.Plotly) + 1
)
colors_used = set()

In [19]:
scale = 'linear'
baseline_loss = 0.01 # = noise**2

for result in results:
    name = result['name']
    color = colors_list[names.index(name)]

    if scale == 'log2':
        y = np.log2(np.abs(result["test_times"]))
        hline = np.log2(np.abs(baseline_loss))

    elif scale == 'linear':
        hline = np.abs(baseline_loss)
        y = np.abs(result["test_times"])

    trace = go.Box(
        x=[result['logn']]*len(result["test_times"]),
        y=y,
        name=name,
        # opacity=0.5,
        legendgroup=name,
        line_color=color,
        offsetgroup=name,
        showlegend=color not in colors_used,
        boxmean=True,
    )

    fig.add_trace(trace)
    colors_used.add(color)

fig.update_yaxes(title_text=f"{scale}(test time)")
fig.update_xaxes(title_text="log2(n)", type='linear')
fig.update_layout(
    width=800,
    height=600,
    title=f"Test times vs n (kernel={kernel}, ground_truth={ground_truth})",
    boxmode='group'
)

In [20]:
# save fig to file
fig_file = os.path.join(
    fig_dir,
    f"test_time-{method}-k={kernel}-gt={ground_truth}-logn={logn_lo}_{logn_hi}-t{n_trials}.png"
)
fig.write_image(fig_file)