In [None]:
import pickle
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
from matplotlib import cm

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
from scipy.spatial import distance_matrix
def smooth(x, y, h=1):
    K = np.exp(-distance_matrix(x.values.reshape(-1,1), x.values.reshape(-1,1))**2/(2*h))
    return (K@y) / (K@np.ones_like(y))

In [None]:
X_train, X_test, y_train, y_test = pickle.load(open('../L=1/mnist.pkl', 'rb'))

In [None]:
results = pickle.load(open('results.pkl', 'rb'))
result_df = pd.DataFrame.from_dict(results)

force = lambda y,f: 1 - y*f
loss = lambda y,f: np.mean(np.maximum(0, force(y,f))**2, -1)
N_del = lambda y,f: np.sum(force(y,f) >= 0, -1)

result_df['test_loss'] = result_df.y_test_hat.apply(lambda f: loss(y_test, f))
result_df['train_loss'] = result_df.y_train_hat.apply(lambda f: loss(y_train, f))
result_df['N_del'] = result_df.y_train_hat.apply(lambda f: N_del(y_train, f))

result_df['P/N'] = result_df['P']/result_df['N']
result_df['N_del/N'] = result_df['N_del']/result_df['N']

result_df['P/h'] = result_df['P']/result_df['h']
result_df['N_del/h'] = result_df['N_del']/result_df['h']

In [None]:
star_cutoff = 1e-2

N_star = result_df.groupby('step').apply(lambda df: df.query('(train_loss > @star_cutoff)')['N'].max())
result_df['N_star'] = result_df['step'].map(N_star)

h_star = result_df.groupby('step').apply(lambda df: df.query('(train_loss > @star_cutoff)')['h'].max())
result_df['h_star'] = result_df['step'].map(h_star)

In [None]:
from matplotlib import colors as mcolors
from matplotlib.colors import LinearSegmentedColormap

cmap = LinearSegmentedColormap.from_list(
    'Mei2019', 
    np.array([
        (243, 232, 29),
        (245, 173, 47),
        (140, 193, 53),
        (50,  191, 133),
        (23,  167, 198),
        (36,  123, 235),
        (53,  69,  252),
        (52,  27,  203)
    ])/255., 
    N=256
)

# cmap = cc.m_bmy

gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))
fig = plt.figure(figsize=(6,.5))
img = plt.imshow(gradient, aspect='auto', cmap=cmap)
title = plt.title('Colormap stolen from Mei2019')

norm=mcolors.LogNorm()

In [None]:
SMALL_SIZE = 11
MEDIUM_SIZE = 12
BIGGER_SIZE = 14
BIGGEST_SIZE = 15

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=BIGGER_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGEST_SIZE)  # fontsize of the figure title

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'h/P'
y_expr = 'train_loss'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
    y_sm = smooth(np.log(x), y, .001)
    plt.plot(x, y_sm, color=color, ls=':', zorder=-1)

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

# plt.yscale('log')
plt.xscale('log')
# plt.axhline(star_cutoff, c='k', ls=':')

plt.xlabel(r"$h/P$")
plt.ylabel(r"Train $\mathcal{L}$")
# plt.title('L=1')
fig.savefig('plots/h_P_vs_train_loss_L=2_linear.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'h/P'
y_expr = 'train_loss'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
    y_sm = smooth(np.log(x), y, .01)
    plt.plot(x, y_sm, color=color, ls=':', zorder=-1)

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
plt.axhline(star_cutoff, c='k', ls=':')

plt.xlabel(r"$h/P$")
plt.ylabel(r"Train $\mathcal{L}$")
# plt.title('L=1')
fig.savefig('plots/h_P_vs_train_loss_L=2_log.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'h/h_star'
y_expr = 'train_loss'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
    y_sm = smooth(np.log(x), y, .01)
    plt.plot(x, y_sm, color=color, ls=':')#, zorder=-1)

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
plt.axvline(1, c='k', ls='--')


plt.xlabel(r"$h/h^*$")
plt.ylabel(r"Train $\mathcal{L}$")
fig.savefig('plots/h_h_star_vs_train_loss_L=2_log.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'h/P'
y_expr = 'test_loss'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
    y_sm = smooth(np.log(x), y, .0001)
    plt.plot(x, y_sm, color=color, ls=':', zorder=-1)

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.axhline(5e-2, c='k')#, ls=':')

plt.xlabel(r"$h/P$")
plt.ylabel(r"Test $\mathcal{L}$")
fig.savefig('plots/h_P_vs_test_loss_L=2_log.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'h/h_star'
y_expr = 'test_loss'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
    y_sm = smooth(np.log(x), y, .001)
    plt.plot(x, y_sm, color=color, ls=':', zorder=-1)
    
plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.xlim(0, 2)
plt.axvline(1, c='k', ls='--')

plt.xlabel(r"$h/h^*$")
plt.ylabel(r"Test $\mathcal{L}$")
fig.savefig('plots/h_h_star_vs_test_loss_L=2_log.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'h/h_star'
y_expr = 'N_del/h'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
#     y_sm = smooth(np.log(x), y, .001)
#     plt.plot(x, y_sm, color=color, ls=':')

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.xlim(0, 2)
plt.axvline(1, c='k', ls='--', alpha=.7)
plt.axhline(1, c='k', ls='--', alpha=.7)


plt.xlabel(r"$h/h^*$")
plt.ylabel(r"$N_\Delta/h$")
fig.savefig('plots/h_h_star_vs_N_del_h_L=2.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

# by = 'step'
x_expr = 'h/h_star'
y_expr = 'N_del/N'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

# by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
# norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
#     y_sm = smooth(np.log(x), y, .001)
#     plt.plot(x, y_sm, color=color, ls=':')

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.xlim(0, 2)
plt.axvline(1, c='k', ls='--', alpha=.7)
plt.axhline(1, c='k', ls='--', alpha=.7)


plt.xlabel(r"$h/h^*$")
plt.ylabel(r"$N_\Delta/N$")
fig.savefig('plots/h_h_star_vs_N_del_N_L=2.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'train_loss'
y_expr = 'N_del/h'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
#     y_sm = smooth(np.log(x), y, .001)
#     plt.plot(x, y_sm, color=color, ls=':')

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.xlim(0, 2)
plt.axhline(1, c='k', ls='--', alpha=.7)


plt.xlabel(r"Train $\mathcal{L}$")
plt.ylabel(r"$N_\Delta/h$")
# fig.savefig('plots/h_h_star_vs_N_del_h_L=1.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

by = 'step'
x_expr = 'train_loss'
y_expr = 'N_del/N'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
#random zorder helps with visual clarity 
extremes = by_vals[[0, -1]]
by_vals = by_vals[1:-1]
by_vals = np.random.choice(by_vals, size=len(by_vals), replace=False)
by_vals = np.append(by_vals, extremes)
norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
#     y_sm = smooth(np.log(x), y, .001)
#     plt.plot(x, y_sm, color=color, ls=':')

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.xlim(0, 2)
plt.axhline(1, c='k', ls='--', alpha=.7)


plt.xlabel(r"Train $\mathcal{L}$")
plt.ylabel(r"$N_\Delta/N$")
# fig.savefig('plots/h_h_star_vs_N_del_h_L=1.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

# by = 'step'
x_expr = 'N_del/h'
y_expr = 'test_loss'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

# by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
# norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
    y_sm = y #smooth(np.log(x), y, .0001)
    plt.plot(x, y_sm, color=color, ls=':')

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.xlim(0, 2)
plt.axvline(1, c='k', ls='--', alpha=.7)
# plt.axhline(1, c='k', ls='--', alpha=.7)


# plt.xlabel(r"$h/h^*$")
# plt.ylabel(r"$N_\Delta/N$")
# fig.savefig('plots/h_h_star_vs_N_del_N_L=1.pdf')

In [None]:
fig = plt.figure(figsize=(9,6))

# by = 'step'
x_expr = 'N_del/N'
y_expr = 'test_loss'

sm = cm.ScalarMappable(norm=norm, cmap=cmap)

# by_vals = np.array(sorted(result_df.eval(by).unique(), reverse=False))[::5][::-1] #[[0, 20, 30, 40, 50, 60, 75, 88]]
# norm.autoscale(by_vals)

for val in by_vals: #[::-1]:
    color = cmap(norm(val))
    
    data = result_df.query(f'{by} == @val')
    data = data.iloc[np.argsort(data.eval(x_expr))]
    
    x = data.eval(x_expr)
    y = data.eval(y_expr)
    plt.scatter(x, y, c=data.eval(by), cmap=cmap, norm=norm, alpha=.7)
    
    y_sm = y #smooth(np.log(x), y, .0001)
    plt.plot(x, y_sm, color=color, ls=':')

plt.colorbar(sm, label='Training Steps')
plt.xlabel(x_expr)
plt.ylabel(y_expr)

plt.yscale('log')
plt.xscale('log')
# plt.xlim(0, 2)
plt.axvline(1, c='k', ls='--', alpha=.7)
# plt.axhline(1, c='k', ls='--', alpha=.7)


# plt.xlabel(r"$h/h^*$")
# plt.ylabel(r"$N_\Delta/N$")
# fig.savefig('plots/h_h_star_vs_N_del_N_L=1.pdf')

In [None]:
plt.figure(figsize=(9,6))
data = result_df.sort_values('step')

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['N_del']/data['h']
y = data['test_loss']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel(r'$N_\Delta/h$')
plt.ylabel(r'Test $\mathcal{L}$')


plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k',ls=':')

In [None]:
plt.figure(figsize=(9,6))
data = result_df

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['P']/data['h']
y = data['N_del']/data['h']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel('P/h')
plt.ylabel(r'$N_\Delta/h$')


plt.yscale('log')
plt.xscale('log')
plt.axhline(1, color='k',ls=':')

In [None]:
plt.figure(figsize=(9,6))
data = result_df

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['P']/data['N']
y = data['N_del']/data['N']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel('P/N')
plt.ylabel(r'$N_\Delta/N$')


plt.yscale('log')
plt.xscale('log')
plt.axhline(1, color='k',ls=':')

In [None]:
plt.figure(figsize=(9,6))
data = result_df.query('step > 1e5')

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['P']/data['N']
y = data['N_del']/data['N']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel('P/N')
plt.ylabel(r'$N_\Delta/N$')


plt.yscale('symlog', linthreshy=1)
plt.xscale('log')
plt.axhline(1, color='k',ls=':')
plt.xlim(1, 500)
plt.ylim(0, 200)

In [None]:
plt.figure(figsize=(9,6))
data = result_df.query('step < 5')

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['P']/data['h']
y = data['N_del']/data['h']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel('P/h')
plt.ylabel(r'$N_\Delta/h$')


plt.yscale('symlog', linthreshy=1.0)
plt.xscale('log')
plt.axhline(1, color='k',ls=':')
plt.xlim(1, 500)
plt.ylim(0, 200)

In [None]:
plt.figure(figsize=(9,6))
data = result_df.query('step < 5')

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['P']/data['h']
y = data['N_del']/data['h']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')


data = result_df.query('step > 1e5')

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['P']/data['N']
y = data['N_del']/data['N']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)


# plt.xlabel('P/h')
# plt.ylabel(r'$N_\Delta/h$')


plt.yscale('symlog', linthreshy=1.0)
plt.xscale('log')
plt.axhline(1, color='k',ls=':')
plt.xlim(1, 500)
plt.ylim(0, 200)

In [None]:
# extremizing_rows = result_df.groupby('step').apply(lambda x: x.query('N_del/h >= 1').sort_values('N_del/h').iloc[0])
# N_star = extremizing_rows['N']
# result_df['N_star'] = result_df.step.map(N_star)

# extremizing_rows = result_df.groupby('step').apply(lambda x: x.query('(N_del/h <= 1.)').sort_values('test_loss').iloc[-1])
extremizing_rows = result_df.groupby('step').apply(lambda x: x.sort_values('test_loss').iloc[-1])
N_star = extremizing_rows['N']
result_df['N_star'] = result_df.step.map(N_star)

In [None]:
plt.plot(extremizing_rows.index, extremizing_rows['N_del/h'])
plt.xscale('log')
# plt.yscale('log')
plt.axhline(1, ls=':', c='k')

In [None]:
plt.figure(figsize=(9,6))
data = result_df

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['N_star'] / data['N'] 
y = data['N_star'] * data['N_del'] / data['N']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel(r'$N/N^*$')
plt.ylabel(r'Train $\mathcal{L}$')


plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k',ls=':')

In [None]:
plt.figure(figsize=(9,6))
data = result_df

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['N_star'] / data['N'] 
y = data['N_del'] / data['h']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel(r'$N/N^*$')
plt.ylabel(r'Train $\mathcal{L}$')


plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k',ls=':')
plt.axhline(1, color='k',ls=':')

In [None]:
plt.figure(figsize=(9,6))
data = result_df.sort_values('step')[::-20]
norm.autoscale(data['step'].values)

#invisible plot to set the limits correctly because matplotlib gets confused with log scale scatters
x = data['N']/data['N_star']
y = data['test_loss']

plt.plot(x, y, color='none')
plt.scatter(x, y, c=data['step'], cmap=cmap, norm=norm, alpha=.7)
plt.colorbar(label='Training steps')

plt.xlabel(r'$N/N^*$')
plt.ylabel(r'Test $\mathcal{L}$')


plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k',ls=':')

In [None]:
plt.plot(result_df['step'], result_df['N_star'], color='none')

plt.scatter(result_df['step'], result_df['N_star'])
plt.yscale('log')
plt.xscale('log')

# Hessian

In [None]:
untrained = result_df.query("step == @result_df['step'].min()")
trained = result_df.query("step == @result_df['step'].max()")

In [None]:
x = 'N_del/h'
y = 'train_loss'

plt.scatter(untrained[x], untrained[y])
plt.scatter(trained[x], trained[y])

for step in  sorted(result_df['step'].unique()):
    df = result_df.query("step == @step")
    # Row with minimum value of N_del/N where train loss is non-zero and N_del/N >= 1 (underparameterized)
    row = df.query('(train_loss > 5e-2) and (N_del/h >= 1)').sort_values('N_del/h').iloc[0]
    plt.scatter(row[x], row[y], c='k')
    
plt.yscale('log')
plt.xscale('log')

In [None]:
from matplotlib import cm
sm = cm.ScalarMappable(norm=norm, cmap=cmap)

steps = sorted(result_df['step'].unique(), reverse=False)
norm.autoscale(steps)

for i, step in enumerate(np.array(steps)): #[20::-1]): #[20::1]):
    df = result_df.query("step == @step")
    row = df.query('(train_loss > 5e-2) and (N_del/h >= 1)').sort_values('N_del/h').iloc[0]

    vals = np.sqrt(row.eigs0)
    hist, edges = np.histogram(np.log(vals), 'sturges', density=True)
    dx = np.mean(np.diff(edges))
    edges = np.concatenate((edges[[0]]-dx, edges[1:]/2 + edges[:-1]/2 , edges[[-1]]+dx))
    edges = np.exp(edges)
    hist = np.concatenate(([0], hist, [0]))

    plt.plot(edges, hist, c=sm.to_rgba(np.clip(step, norm.vmin, norm.vmax)), alpha=1.)
plt.colorbar(sm, label='Training Steps')

plt.xscale('symlog',linthreshx=1e-1)
plt.xlim(0, None)
plt.ylim(0, None)
plt.xlabel(r'$\sqrt{\lambda}$')
plt.ylabel(r'$P\left(\sqrt{\lambda}\right)$')
plt.title("Hessian Spectrum as a Function of Training Steps")

In [None]:
from matplotlib import cm
sm = cm.ScalarMappable(norm=norm, cmap=cmap, )

df = untrained.query('(N_del/h >= 1)')
losses = np.logspace(np.log10(max(5e-2, min(df.train_loss))), np.log10(df.train_loss.max()))
norm.autoscale(losses)

last_loss = None
for i, loss in enumerate(losses[-5::-1]): 
    row = df.query('(train_loss >= @loss) ').sort_values('N_del/h').iloc[0]
    vals = np.sqrt(row.eigs0)
    hist, edges = np.histogram(np.log(vals), 'sturges', density=True)
    dx = np.mean(np.diff(edges))
    edges = np.concatenate((edges[[0]]-dx, edges[1:]/2 + edges[:-1]/2 , edges[[-1]]+dx))
    edges = np.exp(edges)
    hist = np.concatenate(([0], hist, [0]))
    
    plt.plot(edges, hist, c=sm.to_rgba(np.clip(row.train_loss, norm.vmin, norm.vmax)), alpha=1.)
plt.colorbar(sm, label='Train Loss')

plt.xscale('symlog',linthreshx=1e-1)
plt.xlim(0, None)
plt.ylim(0, None)
plt.xlabel(r'$\sqrt{\lambda}$')
plt.ylabel(r'$P\left(\sqrt{\lambda}\right)$')
plt.title("Hessian Spectrum as a Function of Train Loss\nRandom Features")

In [None]:
from matplotlib import cm
sm = cm.ScalarMappable(norm=norm, cmap=cmap)

df = trained.query('(N_del/h >= 1)')
losses = np.logspace(np.log10(max(5e-2, min(df.train_loss))), np.log10(df.train_loss.max()))
norm.autoscale(losses)

last_loss = None
for i, loss in enumerate(losses[-5::-1]): 
    row = df.query('(train_loss >= @loss) ').sort_values('N_del/h').iloc[0]
    vals = np.sqrt(row.eigs0)
    hist, edges = np.histogram(np.log(vals), 'sturges', density=True)
    dx = np.mean(np.diff(edges))
    edges = np.concatenate((edges[[0]]-dx, edges[1:]/2 + edges[:-1]/2 , edges[[-1]]+dx))
    edges = np.exp(edges)
    hist = np.concatenate(([0], hist, [0]))

    plt.plot(edges, hist, c=sm.to_rgba(np.clip(row.train_loss, norm.vmin, norm.vmax)), alpha=1.)
plt.colorbar(sm, label='Train Loss')

plt.xscale('symlog',linthreshx=1e-1)
plt.xlim(0, None)
plt.ylim(0, None)
plt.xlabel(r'$\sqrt{\lambda}$')
plt.ylabel(r'$P\left(\sqrt{\lambda}\right)$')
plt.title("Hessian Spectrum as a Function of Train Loss\nTrained Features")