In [None]:
# Post-Processing
import pickle
import pandas as pd 
import os

import tensorflow as tf

from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.svm import LinearSVC as SVM

import tqdm.auto as tqdm

%config InlineBackend.figure_format = 'svg'

In [None]:
model_dir = 'models'
model_names = os.listdir(model_dir)
X_train, X_test, y_train, y_test = pickle.load(open('mnist.pkl', 'rb'))
lamb = 1e-13

results = []

In [None]:
for model_name in tqdm.tqdm(model_names):
    model_path = os.path.join(model_dir, model_name)

    P = int(model_name.split('-')[0].split('_')[1])
    d = int(model_name.split('-')[1].split('_')[1])
    N = int(model_name.split('-')[2].split('_')[1])

    full_model = tf.keras.models.load_model(model_path+'/MODEL')
    weight_names = [p[:p.find('.index')] for p in os.listdir(model_path) if p.endswith('.ckpt.index')]

    for weight_name in tqdm.tqdm(weight_names, leave=False):
        step = int(weight_name.split('.')[0].split('_')[1])

        weight_path = os.path.join(model_path, weight_name)
        full_model.load_weights(weight_path)

        intermed_model = tf.keras.Sequential([
            tf.keras.layers.InputLayer(d, name='inputs'),
            full_model.get_layer('intermediate')
        ])

        train_features = intermed_model(X_train)
        test_features = intermed_model(X_test)

        C = 1/(N*lamb)

        svm = SVM(penalty='l2', loss='squared_hinge', dual=False, fit_intercept=False, C=C, )
        svm = svm.fit(train_features, y_train)

        y_train_hat = svm.decision_function(train_features)
        y_test_hat = svm.decision_function(test_features)

        result = {
            "P": P,
            "N": N,
            "d": d,
            "lambda": lamb,
            "C": C,
            "y_train_hat": y_train_hat,
            "y_test_hat": y_test_hat

        }
        results.append(result)
        
# pickle.dump(results, open('results.pkl', 'wb'))

In [None]:
results = pickle.load(open('results.pkl', 'rb'))

In [None]:
import pandas as pd

In [None]:
result_df = pd.DataFrame.from_dict(results)

In [None]:
X_train, X_test, y_train, y_test = pickle.load(open('mnist.pkl', 'rb'))

In [None]:
force = lambda y,f: 1 - y*f
loss = lambda y,f: np.mean(np.maximum(0, force(y,f))**2, -1)
N_del = lambda y,f: np.sum(force(y,f) >= 0, -1)
N_correct = lambda y,f: np.sum(y*f > 0, -1)
N_incorrect = lambda y,f: np.sum(y*f < 0, -1)

In [None]:
result_df['test_loss'] = result_df.y_test_hat.apply(lambda f: loss(y_test, f))
result_df['train_loss'] = result_df.y_train_hat.apply(lambda f: loss(y_train, f))
result_df['N_del'] = result_df.y_train_hat.apply(lambda f: N_del(y_train, f))

result_df['N/P'] = result_df['N']/result_df['P']
result_df['P/N'] = result_df['P']/result_df['N']
result_df['N_del/P'] = result_df['N_del']/result_df['P']
result_df['N_del/N'] = result_df['N_del']/result_df['N']


In [None]:
from matplotlib import colors as mcolors
from matplotlib.colors import LinearSegmentedColormap

cmap = LinearSegmentedColormap.from_list(
    'Mei2019', 
    np.array([
        (243, 232, 29),
        (245, 173, 47),
        (140, 193, 53),
        (50,  191, 133),
        (23,  167, 198),
        (36,  123, 235),
        (53,  69,  252),
        (52,  27,  203)
    ])/255., 
)

gradient = np.linspace(0, 1, 256)
gradient = np.vstack((gradient, gradient))
fig = plt.figure(figsize=(6,.5))
img = plt.imshow(gradient, aspect='auto', cmap=cmap)
title = plt.title('Colormap stolen from Mei2019')

norm=mcolors.LogNorm()

In [None]:
result_df.plot('N_del/N', 'train_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)

In [None]:
result_df.plot('N_del/N', 'train_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.yscale('log')

In [None]:
result_df.plot('N_del/N', 'train_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.xscale('log')

In [None]:
result_df.plot('N_del/N', 'train_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.yscale('log')
plt.xscale('log')

In [None]:
result_df.plot('P/N', 'train_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)

In [None]:
result_df.plot('N_del/N', 'test_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k', ls=':')

In [None]:
result_df[result_df.step < 1e3].plot('N_del/N', 'test_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k', ls=':')

In [None]:
result_df[result_df.step > 1e3].plot('N_del/N', 'test_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k', ls=':')

In [None]:
result_df[result_df.step > 1e5].plot('N_del/N', 'test_loss', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.yscale('log')
plt.xscale('log')
plt.axvline(1, color='k', ls=':')

In [None]:
result_df.plot('P/N', 'N_del/N', c='step', kind='scatter', cmap=cmap, norm=norm, alpha=.5)
plt.yscale('log')
plt.xscale('log')
plt.axhline(1, color='k',ls=':')

Why does the threshold $N_\Delta/N = 1$ persist even throughout training?
- maybe it doesn't, but the change in $N_{eff} \ $ is linear rather than exponential, so it isn't showing up on the log-log plots?