## Version Description:
 * In this version - we predict just the __R600 Hole Diameter__ (`R600_HD`)in TZ6 using [Bayesian Neural Network with MC DropOut](https://github.com/valyome/Neural-Networks-with-MC-Dropout/). 
 * LTR data is used for training and testing the ML model. 
 * The input features are `TZ6_Flow`,`MIXP`,`AMBP`,`AMBT` and the TZ6 CLR Restrictors.

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np

#Set some numpy print options for displaying numpy arrays to fit maximum width of cell
np.set_printoptions(precision=3, edgeitems=30, linewidth=1000,formatter=dict(float=lambda x: "%.6g" % x)) 
from functools import reduce

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg' 
plt.style.use('seaborn-whitegrid')

### Loading Data

In [4]:
input_features = ['TZ_Flow','MIXP','AMBP','AMBT','R610_HS1','R611_HS1','R612_HS1','R613_HS1']
usecols = ['HoV']+input_features+['R600_HD']

In [None]:
LTR_df = pd.read_csv('../../data/LTR_dataset.csv', usecols = usecols)[usecols]
LTR_df.head()

Unnamed: 0,HoV,TZ_Flow,MIXP,AMBP,AMBT,R610_HS1,R611_HS1,R612_HS1,R613_HS1,R600_HD
0,A1,887.134249,2600.0,101401.6,299.386667,131,136,120,120,148
1,A2,886.76405,2600.0,101576.3,298.448667,131,136,120,120,149
2,A3,926.224856,2606.1928,102136.6035,297.109024,131,136,114,120,152
3,A4,915.139474,2599.8998,103195.6642,295.060027,131,136,120,120,154
4,A5,891.635528,2600.0,102856.2,294.755833,145,153,130,130,148


In [None]:
# Rescale Data
df = LTR_df.copy()
df.drop(columns=['HoV'],inplace = True) # Drop 'HoV' column as this column with categorical labels 

In [None]:
print('Rescaled Input Dataframe:')
df

Rescaled Input Dataframe:


Unnamed: 0,TZ_Flow,MIXP,AMBP,AMBT,R610_HS1,R611_HS1,R612_HS1,R613_HS1,R600_HD
0,887.134249,2600.0,101401.6,299.386667,131,136,120,120,148
1,886.76405,2600.0,101576.3,298.448667,131,136,120,120,149
2,926.224856,2606.1928,102136.6035,297.109024,131,136,114,120,152
3,915.139474,2599.8998,103195.6642,295.060027,131,136,120,120,154
4,891.635528,2600.0,102856.2,294.755833,145,153,130,130,148
5,893.684842,2600.0,101325.0,293.15,131,136,120,120,148
6,889.523713,2600.0,100650.4,294.146833,131,136,120,120,148
7,928.258087,2601.856275,100356.9848,295.465578,131,140,120,125,153
8,924.676503,2595.41505,100252.7728,297.643472,131,136,120,125,152
9,889.218419,2600.0,102484.2,295.5075,131,136,120,120,150


### Train and Test Data Split:

In [None]:
np.random.seed(13)
sample = np.random.choice(df.index, size=int(len(df)*0.75), replace=False)
train_data, test_data = df.iloc[sample], df.drop(sample)

print("Number of training samples:", len(train_data))
print("\nNumber of testing samples:", len(test_data))

train_data_idx = train_data.index.values.tolist()
df_train = LTR_df.iloc[train_data_idx]
test_data_idx = test_data.index.values.tolist()
df_test = LTR_df.iloc[test_data_idx]

Number of training samples: 25

Number of testing samples: 9


In [None]:
# Extract pandas dataframe values to numpy array
train_array = train_data.values
test_array = test_data.values

# Separate arrays into input and output components (predictors and response variables)
x_train, y_train  = train_array[:,:-1] ,train_array[:,-1:]  
x_test, y_test = test_array[:,:-1],test_array[:,-1:]

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(25, 8) (25, 1) (9, 8) (9, 1)


In [None]:
x_train[:5]

array([[924.677, 2595.42, 100253, 297.643, 131, 136, 120, 125],
       [928.258, 2601.86, 100357, 295.466, 131, 140, 120, 125],
       [890.916, 2600, 101099, 296.279, 131, 136, 120, 120],
       [885.623, 2600, 101984, 293.818, 145, 153, 130, 130],
       [898.155, 2600, 100118, 297.262, 131, 136, 120, 120]])

### Bayesian Neural Network using MC DropOut

In [None]:
import BNN_MonteCarlo_Dropout

Using TensorFlow backend.


In [None]:
# Train the BNN
bnn = BNN_MonteCarlo_Dropout.net(x_train, y_train, n_epochs=2000, n_hidden=[24,12], normalize=True)

In [None]:
# Test on ~25% of the data.
rmse, Yt_hat, MC_pred, predictive_variance = bnn.predict(x_test, y_test)

In [None]:
print('Shape of Predictions:',Yt_hat.shape)

In [None]:
Yt_hat.shape[2]

In [None]:
y_preds = np.zeros((Yt_hat.shape[2], Yt_hat.shape[0])) # empty array to be populated
y_means = [] # save mean for each predicted point
y_std=[] # save standard dev for each predicted
y_mins = [] 
y_maxes = []

for j in range(Yt_hat.shape[2]):
    for i in range(Yt_hat.shape[0]):
        y_preds[j][i] = Yt_hat[i][0][j] # convert the predictions into a more readable format
    y_means.append(y_preds[j].mean()) # get the mean for each prediction
    y_std.append(y_preds[j].std()) # get the standard deviation
    y_mins.append(np.amin(y_preds[j],axis = 0)) # get the min value in the array
    y_maxes.append(np.amax(y_preds[j],axis = 0)) # get the min value in the array

In [None]:
print('Length of Y_Predictions:',len(y_preds))
print('Nr. of Predictions per test point:',len(y_preds[0]))

In [None]:
y_means

In [None]:
y_std

In [None]:
y_mins

In [None]:
y_maxes

In [None]:
# Manual Check of means of a random Column of y_preds array
x = []
for i in range(len(y_preds)):
    element = y_preds[i][0] # first index specifies array number, seconds index specifies the index of element inside the array
    x.append(element)
print('First Column elements:',x)
print('Mean:',np.mean(x))

In [None]:
y_true = df_test['R600_HD'].values
HoV_test_data = df_test['HoV'].values

In [None]:
print('No. of Test HoVs:',len(HoV_test_data),'\nHoVs of Test Points:',HoV_test_data)
print('Test Points-Org Y:', y_true)

### Plot BNN Predictions with Mean and Error Bars

In [None]:
from bokeh.models import HoverTool,ColumnDataSource,Label,Range1d
from bokeh.plotting import figure, show, output_file,output_notebook
from bokeh.models.markers import CircleX
output_notebook()

In [None]:
# create the coordinates for the errorbars
y_xs = []
ys_std = []
y_ys = []

for x, y_mean,std,min_dia, max_dia in zip(HoV_test_data,y_means,y_std,y_mins,y_maxes):
    y_xs.append((x, x))
    ys_std.append((y_mean - std, y_mean + std))
    y_ys.append((y_mean - (y_mean - min_dia), y_mean + (max_dia - y_mean)))

In [None]:
# plot the points
f = figure(x_range=HoV_test_data, title='BNN MC DropOut - Dia Predictions with Confidence Interval', width=1200)
dot_hover_tooltips = HoverTool(names=['DiaPoints'],tooltips=[("HoV","@x"),("Dia", "@y mm")])
whisker_hover_tooltips = HoverTool(names=['Whiskers'],tooltips=[("Dia", "@y mm")])
multiline_hover_tooltips = HoverTool(names=['LinePlot'],tooltips=[("Dia", "$y mm")])

f.add_tools(dot_hover_tooltips,whisker_hover_tooltips,multiline_hover_tooltips)

f.xaxis.axis_label = 'Head of Versions'
f.yaxis.axis_label = 'Diameter Predictions (mm)'

f.multi_line(y_xs, ys_std, color='blue', line_width = 3, legend = 'Mean Prediction +/- Std.Dev')
f.multi_line(y_xs, y_ys, color='deepskyblue',name='LinePlot',legend = 'Mean Prediction +/- Min & Max Value')
f.diamond(HoV_test_data, y_true, color='red', size=12, line_alpha=0,name='DiaPoints', legend = 'True Dia')
f.circle(HoV_test_data, y_means, color='blue', size=9, line_alpha=0,name='DiaPoints', legend = 'Predicted Dia (Mean)')

# whiskers (almost-0 height rects simpler than segments)
for i in range(len(y_xs)):
    f.rect(y_xs[i], y_ys[i], 0.2, 0.001, line_color="deepskyblue",name='Whiskers')

f.y_range=Range1d(135, 165)
f.legend.location = "top_center"
f.legend.click_policy="hide"

output_file('plots/BNN_MCDropOut_v2_LTR_LTR_Predictions.html', mode='inline')

In [None]:
show(f)