In [1]:
import seaborn as sns
import concurrent.futures
import argparse
import numpy as np
import subprocess
import os
from pathlib import Path
from typing import List, Optional
import json
import pandas as pd
from collections import defaultdict 
import yaml
import json
import gin
from typing import Dict, Any

import lib_analysis 
from lib_plot import *

In [2]:
from jupyter_dash import JupyterDash
import dash_html_components as html
import dash_core_components as dcc

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

  and should_run_async(code)


In [3]:
TOY_DF = lib_analysis.read_problem(Path('../data'), 'toy')

In [4]:
tabs = []
for indep in sorted(TOY_DF["indep"].unique()):
    figs = []
    
    df = TOY_DF[TOY_DF["indep"] == indep].copy()
    fig = end_to_end_plot(
        go.Figure(),
        expand_network(df, "norm"),
        "norm",
        ["network", "kernel"],
        "Norms of weights",
    )
    figs.append(fig)

    fig = end_to_end_plot(
        go.Figure(), df, "cos", ["kernel"], "Cos of weights"
    )
    figs.append(fig)

    fig = end_to_end_plot(
        go.Figure(),
        split_train_test(df, "diversity_loss"),
        "diversity_loss",
        ["kernel", "distribution"],
        "Diversity loss",
    )
    figs.append(fig)

    fig = end_to_end_plot(
        go.Figure(),
        split_train_test(expand_network(df, "test_accuracy"), "accuracy"),
        "accuracy",
        ["kernel", "distribution", "network"],
        "Accuracy",
    )
    figs.append(fig)
    
    tabs.append(dcc.Tab(label=f"{indep} independence", children=[
        dcc.Graph(figure=f) for f in figs
    ]))

    
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
    dcc.Tabs(tabs)
])
app.run_server(mode="inline")


KeyError: 'indep'

In [6]:
#!/usr/bin/env python3
import seaborn as sns
import concurrent.futures
import argparse
import numpy as np
import subprocess
import os
from pathlib import Path
from typing import List, Optional
import json
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})


n_samples = 20

sigma_c = 0.4
sigma_s = 0.4

np.random.seed(0)
y = np.random.choice([1, -1], size=(n_samples))
c = np.random.normal(2 * y, scale=sigma_c, size=(n_samples))
z = np.random.choice([1, -1], size=(n_samples))
s = np.random.normal(2 * z, scale=sigma_s, size=(n_samples))
data = []
for (x_0, x_1, c_y) in zip(c, s, y):
    data.append({
        'c': x_0,
        's': x_1,
        'label': c_y
    })
test_data = pd.DataFrame(data)
test_data['distribution'] = 'test'


np.random.seed(0)
y = np.random.choice([1, -1], size=(n_samples))
c = np.random.normal(2 * y, scale=sigma_c, size=(n_samples))
s = np.random.normal(2 * y, scale=sigma_s, size=(n_samples))
data = []
for (x_0, x_1, c_y) in zip(c, s, y):
    data.append({
        'c': x_0,
        's': x_1,
        'label': c_y
    })
train_data = pd.DataFrame(data)
train_data['distribution'] = 'train'

data = pd.concat([test_data, train_data])
    
def eval_linear_function(a: float, b: float, c: float) -> np.ndarray:
    lim = 3
    while True:
        xs = np.linspace(-lim, lim)
        ys = - 1 / b * (a * xs + c) 
        if np.sum(np.absolute(ys) > 10.0) <= 5 or lim < 0.0001:
            break
        else:
            lim /= 2
    return (xs, ys)


prob = 'toy'

os.makedirs('decision_boundaries', exist_ok=True)

df = TOY_DF.groupby(['indep', 'kernel', 'lambda']).agg(pd.DataFrame.sample)
df = df[['weights']]
df = df.reset_index()

def plot_weights(row: pd.Series):
    weights = row['weights']
    w0, w1 = weights["0"], weights["1"]
    
    net_0 = w0['dense/kernel:0']
    net_0 = net_0 * w0["dense_1/kernel:0"][0][0]

    net_1 = w1['dense_2/kernel:0']
    net_1 = net_1 * w1["dense_3/kernel:0"][0][0]

    xs_0, ys_0 = eval_linear_function(a=net_0[0][0], b=net_0[1][0], c=w0["dense_1/bias:0"])
    xs_1, ys_1 = eval_linear_function(a=net_1[0][0], b=net_1[1][0], c=w1["dense_3/bias:0"])

    plot = sns.relplot(x='c', y='s', hue='label', style='distribution', legend='full', data=data)

    if ys_0[0] > ys_1[0]:
        plt.plot(xs_0, ys_0, 'y-')
        plt.plot(xs_1, ys_1, 'r-')
    else:
        plt.plot(xs_0, ys_0, 'r-')
        plt.plot(xs_1, ys_1, 'y-')

    indep = row['indep']
    kernel = row['kernel']
    lambda_ = row['lambda']
    plot.set(title=f"decision boundaries for {indep}; {kernel}; lambda={lambda_}")
    plot.savefig(f'decision_boundaries/{indep}_{kernel}_lambda{lambda_}.png')
    
df.apply(plot_weights, axis=1)

  and should_run_async(code)


KeyError: 'indep'

In [3]:
MNIST_DF = lib_analysis.read_problem(Path('../data'), 'biased_mnist')
MNIST_DF['weights'] = MNIST_DF['weights'].apply(lib_analysis._weights_to_numpy)
MNIST_DF[['cos', 'norm']] = MNIST_DF['weights'].apply(lib_analysis._process_weights_for_cos_and_norm)
MNIST_DF = MNIST_DF.rename(columns={
    "BiasedMnistProblem.training_data_label_correlation": "label_correlation",
    "BiasedMnistProblem.base_model": "model",
})

KeyError: 'weights'

In [15]:
MNIST_DF = MNIST_DF.rename(columns={
    "BiasedMnistProblem.training_data_label_correlation": "label_correlation",
    "BiasedMnistProblem.base_model": "model",
})

In [16]:
top_level_tabs = []
label_corr = [0.9, 0.999]

for l_corr in label_corr:
    tabs = []
    for indep in sorted(MNIST_DF["indep"].unique()):
        figs = []

        df = MNIST_DF[MNIST_DF["indep"] == indep]
        df = df[df['label_correlation'] == l_corr]
        df = df.copy()
        fig = end_to_end_plot(
            go.Figure(),
            expand_network(df, "norm"),
            "norm",
            ["network", "kernel"],
            "Norms of weights",
        )
        figs.append(fig)

        fig = end_to_end_plot(
            go.Figure(), df, "cos", ["kernel"], "Cos of weights"
        )
        figs.append(fig)

        fig = end_to_end_plot(
            go.Figure(),
            split_train_test(df, "diversity_loss"),
            "diversity_loss",
            ["kernel", "distribution"],
            "Diversity loss",
        )
        figs.append(fig)

        fig = end_to_end_plot(
            go.Figure(),
            split_train_test(expand_network(df, "test_accuracy"), "accuracy"),
            "accuracy",
            ["kernel", "distribution", "network"],
            "Accuracy",
        )
        figs.append(fig)

        tabs.append(dcc.Tab(label=f"{indep} independence", children=[
            dcc.Graph(figure=f) for f in figs
        ]))
    tl_tab = dcc.Tab(label=f"{l_corr} label correlation", children = [dcc.Tabs(children=tabs)])
    top_level_tabs.append(tl_tab)

    
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
    dcc.Tabs(top_level_tabs)
])
app.run_server(mode="inline")
