# Train a neural network.

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

Set seeds

In [None]:
np.random.seed(1)
tf.random.set_seed(1)

Set parameters

In [None]:
# file with training input and output data: format is x,y
input_file = 'training.csv'

# names of input/output columns
inputs = ['mu', 'angle', 'threshold']
outputs = ['low_speed', 'high_speed']

# these set the input/output dimensions of the network
input_size = len(inputs)
output_size = len(outputs)

Read data

In [None]:
data = pd.read_csv(input_file)
data.columns = inputs + outputs

x = np.array(data[inputs])
y = np.array(data[outputs])

Normalizing preprocessing layer from training data

In [None]:
normalizer = keras.layers.experimental.preprocessing.Normalization()
normalizer.adapt(x)

Penalize overpredictions more than underpredictions

In [None]:
def asymmetric_mse(y_true, y_pred):
    standard_mse = keras.losses.mse(y_true, y_pred)
    geq = keras.backend.any(keras.backend.greater(y_pred, y_true)) # true/false, are there overpredictions?
    geq_scale = keras.backend.switch(geq,5.0,1.0) # if there are overpredictions, scale up mse
    return geq_scale * standard_mse

Base model

In [None]:
# model = keras.models.Sequential()
# model.add(keras.layers.Dense(input_size, activation='linear'))
# #model.add(keras.layers.Dense(20,activation='relu'))
# model.add(keras.layers.Dense(4,activation='relu'))
# #model.add(keras.layers.Dropout(0.05,training=True))
# model.add(keras.layers.Dense(output_size,activation='relu'))
# model.compile(loss='mse',optimizer='adam')

Compile model

In [None]:
input_shape = x.shape[1:]
full_model_input = keras.Input(shape=input_shape)
normalized_input = normalizer(full_model_input)
dense_layer1 = keras.layers.Dense(100, activation='relu')(normalized_input)
dropout_layer1 = keras.layers.Dropout(0.1)(dense_layer1,training=True) # https://github.com/keras-team/keras/issues/9412#issuecomment-366487249
dense_layer2 = keras.layers.Dense(100, activation='relu')(dropout_layer1)
dropout_layer2 = keras.layers.Dropout(0.1)(dense_layer2,training=True) # https://github.com/keras-team/keras/issues/9412#issuecomment-366487249
full_model_output = keras.layers.Dense(output_size,activation='relu')(dropout_layer2)
full_model = keras.Model(full_model_input, full_model_output)
full_model.compile(loss='mse',optimizer='adam',metrics=['mse'])

Split data

In [None]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.33, shuffle= True)

Train model

In [None]:
model_output = full_model.fit(x_train,y_train,epochs=700,batch_size=10,verbose=0,validation_data=(x_val,y_val)) # check validation

View training

In [None]:
plt.yscale('log')
plt.title('loss')
plt.plot(model_output.history['loss'], label='train')
plt.plot(model_output.history['val_loss'], label='validation')
plt.legend()
plt.figure()
plt.yscale('log')
plt.title('mse')
plt.plot(model_output.history['mse'], label='train')
plt.plot(model_output.history['val_mse'], label='validation')
plt.legend()
plt.figure()

View output

In [None]:
# plot speed vs angle given mu, threshold
mu = 0.009 # set mu
thresh = 4 # set threshold

# bug: mu = 0.009 is read as 0.0090..01
#plot_values = [i for i in x if i[0] == mu and i[2] == thresh] # x, y
plot_x = [i for i in x if np.isclose(i[0], mu) and i[2] == thresh] # x, y
# this is not generic enough...
pred_x = [[mu,angle,thresh] for angle in np.linspace(0,165,165)]
pred = full_model.predict(pred_x)

plt.title('mu = %s, thresh = %s' % (mu, thresh))
plt.xlabel('angle (degrees)')
plt.ylabel('speed (m/s)')
#plt.plot([i[1] for i in plot_values], [y[i] for i,v in enumerate(x) if v[0] == mu and v[2] == thresh])
plt.plot([i[1] for i in plot_x], [y[i] for i,v in enumerate(x) if np.isclose(v[0], mu) and v[2] == thresh], marker='o')
plt.plot([i[1] for i in pred_x], pred, label='pred')
plt.legend()

In [None]:
# plot speed vs angle given mu, threshold
mu = 0.009 # set mu
thresh = 4 # set threshold

# bug: mu = 0.009 is read as 0.0090..01
#plot_values = [i for i in x if i[0] == mu and i[2] == thresh] # x, y
for i in range(0, 5):
    plot_x = [i for i in x if np.isclose(i[0], mu) and i[2] == thresh] # x, y
    # this is not generic enough...
    pred_x = [[mu,angle,thresh] for angle in np.linspace(0,165,165)]
    pred = full_model.predict(pred_x)
    plt.scatter([i[1] for i in pred_x], pred[...,1])
plt.plot([i[1] for i in plot_x], [y[i][1] for i,v in enumerate(x) if np.isclose(v[0], mu) and v[2] == thresh], label='train')

In [None]:
full_model.predict([[0.009,90,4]])

In [None]:
# http://www.cs.ox.ac.uk/people/yarin.gal/website/blog_3d801aa532c1ce.html
dropout_prob = 0.05
T = 100
input_x = [[1,1,1]]
l = 1 # how to set???
N = len(x[...,0])
weight_decay = 0.01 # maybe?

probs = []
for i in range(0,T):
    probs += [full_model.predict(input_x)]
predictive_mean = np.mean(probs, axis=0)
predictive_variance = np.var(probs, axis=0)
tau = l**2 * (1 - dropout_prob) / (2 * N * weight_decay)
predictive_variance += tau**-1
print(predictive_mean, predictive_variance)

In [None]:
plt.hist(full_model.predict(1000*[[0.009,150,4]])[...,1])
plt.ylabel('number of predictions')
plt.xlabel('high velocity prediction')
plt.title('mu = 0.009, angle = 150, threshold = 4')

In [None]:
#weights
fig = plt.figure()
plt.subplot(2, 2, 1)
plt.hist(full_model.layers[2].get_weights()[0][0])
plt.subplot(2, 2, 2)
plt.hist(full_model.layers[2].get_weights()[0][1])
plt.subplot(2, 2, 3)
plt.hist(full_model.layers[2].get_weights()[0][2])
plt.show()

Save model

In [None]:
full_model.save("2_speed_network_dropout.h5")

Find largest divergence between prediction and training data

In [None]:
divergence = (full_model.predict(x) - y)
max_divergence = max([i[1] for i in divergence])
print(max_divergence)
#print(max_divergence, x[np.where(divergence == max_divergence)[0]])

Make a lot of plots

In [None]:
# only plot the high prediction...
mus = data['mu'].unique()
angles = data['angle'].unique()
thresholds = data['threshold'].unique()
a=0
# plot speed vs angle given mu, threshold
for mu in mus:
    for threshold in thresholds:
        # bug: mu = 0.009 is read as 0.0090..01
        #plot_values = [i for i in x if i[0] == mu and i[2] == thresh] # x, y
        plot_x = [i for i in x if np.isclose(i[0], mu) and i[2] == threshold] # x, y
        pred_x = [[mu,angle,threshold] for angle in np.linspace(angles.min(),angles.max(),angles.max())]
        #plt.plot([i[1] for i in plot_values], [y[i] for i,v in enumerate(x) if v[0] == mu and v[2] == thresh])
        fig = plt.figure()
        plt.title('mu: %.3f, threshold: %.2f' % (mu, threshold))
        plt.plot([i[1] for i in plot_x], [y[i][1] for i,v in enumerate(x) if np.isclose(v[0], mu) and v[2] == threshold],
                label = 'training')
        #plt.plot([i[1] for i in pred_x], [i[1] for i in pred], label = 'predicted')
        plt.legend()
        for i in range(0, 20):
            pred = full_model.predict(pred_x)
            plt.scatter([i[1] for i in pred_x], pred[...,1], c='grey', alpha='0.5')
        plt.savefig('dropout_plots/mu-%.3f_threshold-%.2f.png' % (mu,threshold))
        plt.close()