In [None]:
%load_ext autoreload
%autoreload 2
# %matplotlib 

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pde_nn

In [None]:
net = pde_nn.DiffusionNN(verbose = True)

In [None]:
net.initialize() # to reset graph for each run of the cell
print(net.X.size)
net.add_layer(100, tf.nn.elu)
# net.add_layer(100, tf.nn.sigmoid)
net.add_layer(100, tf.nn.elu)

# Front cover

In [None]:
from mpl_toolkits.mplot3d import axes3d

f = lambda x,t: np.sin(np.pi*x) * np.exp(-np.pi**2 * t)
N = 100
t = np.linspace(0,0.5,40)
x = np.linspace(0,1,60)
X,T = np.meshgrid(x,t)

Z = f(X,T)

norm = plt.Normalize(Z.min(), Z.max())
colors = plt.cm.viridis(norm(Z))
rcount, ccount, _ = colors.shape

fig = plt.figure(figsize = [16,13], frameon = False)
ax = fig.add_subplot(111, projection = '3d')
surf = ax.plot_surface(X, T, Z, rcount=rcount, ccount=ccount,
                       facecolors=colors, shade=False)
surf.set_facecolor((0,0,0,0))

ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])
plt.axis('off')

ax.view_init(30, 40)
fig.savefig('figures/front_cover.pdf')

In [None]:
# opt_kwargs = dict(momentum = 0.0,  use_nesterov = False)
opt_kwargs = {} # dict(beta1 = 0.9)
net.run(0.0001, 10000, optimizer = tf.train.AdamOptimizer, **opt_kwargs)

In [None]:
opt_kwargs = {} # dict(momentum = 0.5,  use_nesterov = True)
net.run(0.05, 10000, optimizer = tf.train.GradientDescentOptimizer, **opt_kwargs)

In [None]:
fig = plt.figure()
net.plot_error()
fig.savefig('figures/error.pdf')

# Run a hyperparameter search with hyperopt

In [None]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials


In [None]:
i = 0
def objective(params):
    global i
    print(i)
    i = i + 1
    net = pde_nn.DiffusionNN(verbose = False)
    net.initialize() # to reset global graph variables
    
    
    first    = params['first_layer']
    last_rel     = params['last_layer_rel']
    last = 10 +  (first - 10)*last_rel
    n_hidden = params['n_hidden'] + 1
    
    layer_sizes = np.logspace(np.log10(first), np.log10(last), n_hidden)
    
    for size in layer_sizes:
        net.add_layer(size, params['act_func'])
        
    print(params)
    p = params['optimizer'].copy()
    opt = p.pop('type')
    opt_kwargs = p
    learning_rate = 10**params['log_learning_rate']
    final_cost = net.run(learning_rate = learning_rate,
                         iterations = 10000,
                         optimizer = opt, **opt_kwargs)
    print(final_cost, net.mse[-1])
    if np.isnan(final_cost):
        final_cost = np.inf
    return {'loss':final_cost, 'params':params, 'net':net, 'status': STATUS_OK}
   

### Adam optimizer

In [None]:
hyperopt_grid = {
    'log_learning_rate' : hp.quniform('log_learning_rate', -5, -1, 1),
    'first_layer'   : hp.quniform('first_layer', 10, 128, 2),
    'last_layer_rel'    : hp.quniform('last_layer_rel', 0.1, 1, 0.01),
    'n_hidden'      : hp.randint('n_hidden', 4), # +1 in the code
    'act_func'      : hp.choice('act_func', [tf.nn.sigmoid, tf.nn.tanh, tf.nn.relu]),
    'optimizer'     : hp.choice('optimizer', [{'type': tf.train.AdamOptimizer},
                                             ])
}       
trials = Trials()
results = fmin(objective, hyperopt_grid, algo = tpe.suggest,
              trials = trials, max_evals = 200)

### Momentum optimizer

In [None]:
hyperopt_grid_momentum = {
    'log_learning_rate' : hp.quniform('log_learning_rate', -5, -1, 1),
    'first_layer'   : hp.quniform('first_layer', 10, 128, 2),
    'last_layer_rel'    : hp.quniform('last_layer_rel', 0.1, 1, 0.01),
    'n_hidden'      : hp.randint('n_hidden', 4), # +1 in the code
    'act_func'      : hp.choice('act_func', [tf.nn.sigmoid, tf.nn.tanh, tf.nn.relu]),
    'optimizer'     : hp.choice('optimizer', [ {'type': tf.train.MomentumOptimizer, 
                                               'momentum': hp.quniform('momentum', 0, 1, 0.1), 
                                               'use_nesterov': hp.choice('use_nesterov', [False, True])}
                                             ])}       

In [None]:

trials_mom = Trials()
results_mom = fmin(objective, hyperopt_grid_momentum, algo = tpe.suggest,
              trials = trials_mom, max_evals = 200)

In [None]:
# Helper function
def add_layer_columns(df):
    first = df['first_layer']
    gamma_s  = df['last_layer_rel']
    last  = 10 + (first - 10) * gamma_s
    n_hidden = df['n_hidden'] + 1
    sizes = []
    for f,l,n in zip(first.values, last.values, n_hidden.values):
        sizes.append(np.logspace(np.log10(f), np.log10(l), n, dtype = int))
    df['layers'] = sizes
    df['total_layer_size'] = [np.sum(v) for v in sizes]

# Data into dataframes

In [None]:
df_mom = pd.DataFrame(trials_mom.results)
df_mom = pd.concat([df_mom, pd.DataFrame(list(df_mom['params']))], axis = 1, sort = False)
df_mom['act_func_name'] = df_mom['act_func'].apply(lambda x:x.__name__)
df_mom['mse'] = df_mom['net'].apply(lambda x: x.mse[-1])
add_layer_columns(df_mom)
i = np.argmin(df_mom.loss.values)

In [None]:
plt.semilogy(df_mom.iloc[i]['net'].cost)
plt.semilogy(df_mom.iloc[i]['net'].mse)

# print info on best adam net

In [None]:

df_adam = pd.DataFrame(res)
add_layer_columns(df_adam)
df_adam['act_func_name'] =  df_adam['act_func'].apply(lambda x:x.__name__) 

# gather best nets of momentum-optimizer


print('name,  size,  learning_rate')
for name, item in df_adam.groupby('act_func_name'):
    i = np.argmin(item.loss.values)
    best = item.iloc[i]
    print('=============')
    print(name)
    print('=============')
    print('size', best['total_layer_size'])
    print('nhid', best['n_hidden'])
    print('lamb', best['log_learning_rate'])
    print('cost', best['loss'])
    print('mse ', best['mse'])

# gather best nets of momentum-optimizer, and print info

In [None]:
names_mom = []
best_nets_mom = []
print('name,  size,  learning_rate')
for name, item in df_mom.groupby('act_func_name'):
    i = np.argmin(item.loss.values)
    best = item.iloc[i]
    names_mom.append(name)
    best_nets_mom.append(best['net'])
    print('=============')
    print(name)
    print('=============')
    print('size', best['total_layer_size'])
    print('nhid', best['n_hidden'])
    print('mome', best['optimizer']['momentum'])
    print('use?', best['optimizer']['use_nesterov'])
    print('lamb', best['log_learning_rate'])
    print('cost', best['loss'])
    print('mse ', best['mse'])
best_nets_mom

# Cost function as Momentum

In [None]:
temp = df_mom['optimizer'].apply(pd.Series)
temp['loss'] = df_mom['loss']
temp
for nest, item in temp.groupby('use_nesterov'):
    print(nest)
    plt.semilogy(item['momentum'], item['loss'],'o', label = 'Nesterov = {}'.format(nest))
    
plt.ylabel('Cost')    
plt.xlabel('Momentum')    
plt.legend()
plt.tight_layout()
plt.savefig('figures/nesterov_mom.pdf')

In [None]:
plt.semilogy(df['loss'], 'o')

In [None]:
new_df = pd.DataFrame(list(df['params']))
new_df

In [None]:
mse = np.loadtxt('mse.txt')
mse_best = np.argmin(mse)
plt.semilogy(mse, 'o')

In [None]:
 
    
    
import pandas as pd

losses = trials.losses()
loss_best = np.argmin(losses)
res = []
temp = _104

for m, r in zip(mse, _104.copy()):# trials.results:
    res.append(r['params'].copy())
    res[-1]['act_func'] = res[-1]['act_func'] # .__name__
    res[-1]['loss'] = r['loss']
    res[-1]['mse'] = m
    
    # add info on layers
    first   = res[-1]['first_layer']
    gamma_s = res[-1]['last_layer_rel']
    last = 10 + (first - 10)*gamma_s
    
    n_hidden = res[-1]['n_hidden'] + 1
    layer_sizes = np.logspace(np.log10(first), np.log10(last), n_hidden,dtype=int)
    res[-1]['layers'] = list(layer_sizes)

In [None]:
df = pd.DataFrame(res)
   
add_layer_columns(df)

In [None]:
df

In [None]:
act_funcs

# Regenerate best network for Adam, as we didn't save networks at first run

In [None]:
best_outs = {}
for name, item in df.groupby('act_func'):
    i_best = (item.sort_values(by = 'loss').iloc[0].name)
    
    p = trials.results[i_best]['params']
    p['optimizer']['type'] = tf.train.AdamOptimizer
    best_outs[name] = objective(p)

# Run one more time to also catch the network
Was not implemented at time of initial run

In [None]:
p = trials.results[loss_best]['params']
p['optimizer']['type'] = tf.train.AdamOptimizer
out = objective(p)

In [None]:
out = _190 # how it actually was run

# Best runs

In [None]:
plt.rcParams.update({'font.size':14})

def plot_costs_and_mse(names, best_nets, filename = 'best', title=None):
    """Plots solution and mse of one network of each type"""


    fig,axes = plt.subplots(len(names),figsize = [5,1 + 2*len(names)], sharex = True)
    if title:
        fig.suptitle(title)

    for ax,name, net in zip(axes, names, best_nets):
        # net = out['net']
        ax.semilogy(net.cost, linewidth = 1)
        ax.semilogy(net.mse, linewidth = 1)

        ax.set_title(name)
        ax.set_ylabel('Error')

    axes[0].legend(['cost','mse'], loc = 'upper left')
    axes[-1].set_xlabel('Iteration')
    fig.tight_layout()
    fig.savefig('figures/{}.pdf'.format(filename))

best_nets_adam = []
names_adam = []

for name, out in  best_outs.items():
    best_nets_adam.append(out['net'])
    names_adam.append(name)
plot_costs_and_mse(names_adam, best_nets_adam, filename = 'best_adam')
plot_costs_and_mse(names_mom, best_nets_mom, filename = 'best_mom')


# Best run at $x \sim 0.5$

In [None]:
plt.rcParams.update({'font.size':14})
plt.figure()
x = best_nets_adam[2].X[0,10]
t = np.linspace(0,1,20)
analytic = np.sin(np.pi * x) * np.exp(-np.pi**2 * t)

meshes = best_nets_adam[2].meshes[::8]

col = np.linspace(0,1,len(meshes)+1 )[:-1]

ax = plt.gca()
for c, mesh in zip(col, meshes):
    ax.semilogy(t, mesh[:,10], linewidth = 1.5, color = plt.cm.viridis(c))
    
    


ax.semilogy(t, analytic, '--', linewidth = 2, color = 'k', label = 'Analytical')
ax.legend()

ax.set_xlabel('Time (t)')
ax.set_ylabel('Value at $x \sim 0.5$')

import matplotlib as mpl
from mpl_toolkits.axes_grid1 import make_axes_locatable

cmap = mpl.cm.viridis
norm = mpl.colors.Normalize(vmin=0, vmax=10000)
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.05)
cb1 = mpl.colorbar.ColorbarBase(cax, cmap=cmap,
                                norm=norm,
                                orientation='vertical')
cb1.set_label('Iteration')

plt.tight_layout()
plt.savefig('figures/best_sol_convergence.pdf')

# Make images for animation

In [None]:

x = np.linspace(0,1,20)
t = np.linspace(0,1,20)
X,T = np.meshgrid(x,t)
for i,mesh in enumerate(net.meshes):
    
    fig = plt.figure()
    ax = fig.add_subplot(111, projection = '3d')
    
    ax.set_title('{:0>4}'.format(i))
    ax.plot_surface(X, T, mesh)
    ax.set_zlim([0,1])
    fig.savefig('figures/temp{:0>4}.png'.format(i))
    
    plt.close()

# Hist plots

In [None]:
plt.rcParams.update({'font.size':14})

def plot_hist(df,  ax=None, sharex = False):
    if ax is None:
        fig = plt.figure()
        ax = plt.gca()
    else:
        fig = plt.gcf()
    df = df.sort_values(by = 'loss')

    bins1 = np.linspace(-5,1.2,20)

    for act, item in df.groupby('act_func_name'):
        ax.hist(np.log10(item['loss']), bins = bins1,  label = act, histtype='step')

    ticks = np.arange(-5,2)
    ax.set_xticks(ticks)
    ax.set_xticklabels(["$10^{{{}}}$".format(i) for i in ticks])
    if not sharex:
        ax.set_xlabel('Cost')
    ax.set_ylabel('Count')

    ax.legend(loc = 'upper center')
    
df = pd.DataFrame(res)
df['act_func_name'] = df['act_func'].apply(lambda x:x.__name__)

fig,[ax1,ax2] = plt.subplots(2,1, figsize = [5,6], sharex = True)
plot_hist(df,  ax = ax1, sharex = True)
ax1.set_title('Adam')
plot_hist(df_mom,  ax = ax2)
ax2.set_title('Momentum')
plt.savefig('figures/hyper_hist.pdf')

# Create tables

In [None]:
df = pd.DataFrame(res)
df = df.sort_values(by = 'loss')
print(df[['act_func', 'layers', 'log_learning_rate', 'loss', 'mse']].head(5).to_latex(escape = None))

In [None]:
df = pd.DataFrame(res)
df = df.sort_values(by = 'mse')
print(df[['act_func', 'layers', 'log_learning_rate', 'loss', 'mse']].head(5).to_latex(escape = None))

In [None]:
print(df_mom.sort_values(by='loss')[['act_func_name', 'layers', 'log_learning_rate', 'loss', 'mse']].head(5).to_latex(escape = None))

In [None]:
print(df_mom.sort_values(by='mse')[['act_func_name', 'layers', 'log_learning_rate', 'loss', 'mse']].head(5).to_latex(escape = None))