In [1]:
import numpy as np
import pandas as pd
import math
from datetime import datetime
import CMAPSAuxFunctions

from data_handler_VALVE import ValveDataHandler
from tunable_model import SequenceTunableModelRegression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn import metrics

from keras.models import Sequential, Model
from keras.layers import Dense, Input, Dropout, Reshape, Conv2D, Flatten, MaxPooling2D
from keras.optimizers import Adam, SGD
from keras.callbacks import LearningRateScheduler
from keras import backend as K
from keras import regularizers

Using TensorFlow backend.


<h1> Create Data Handler </h1>

In [2]:
features = ['timestamp', 'externalControllerOutput', 'pressureValveInlet', 'pressureValveOutlet', 'mediumTemperature', 'rodDisplacement', 'disturbedMediumFlow', 'selectedFault', 'faultType', 'faultIntensity']
selected_indices = np.array([3,4,5,6,7])
selected_features = list(features[i] for i in selected_indices-1)

window_size = 35
window_stride = 15

# min = 2018-02-14 18:59:20
# max = 2018-08-19 18:28:20
time_start = "2018-02-14 18:59:20"
time_end = "2018-04-19 18:28:20"

# Either anomaly, classification or regression
problem = 'anomaly'
#problem = 'classification'
#problem = 'regression'

vHandler = ValveDataHandler(time_start, time_end, selected_features = selected_features,
                            sequence_length = window_size, sequence_stride = window_stride,
                            problem = problem)

init


In [3]:
print(selected_features)

['pressureValveInlet', 'pressureValveOutlet', 'mediumTemperature', 'rodDisplacement', 'disturbedMediumFlow']


<h1> Keras Model </h1>

In [4]:
#np.set_printoptions(threshold=np.nan)

K.clear_session()
lambda_regularization = 0.20

def create_ANN_model(input_shape, problem):
    
    #Create a sequential model
    model = Sequential()
    
    #Add the layers for the model
    model.add(Dense(20, input_dim = input_shape, activation = 'relu', kernel_initializer = 'glorot_normal', 
                    kernel_regularizer = regularizers.l2(lambda_regularization), name = 'fc1'))
    
    if (problem == 'classification'):
        model.add(Dense(20, activation = 'softmax', name = 'out'))
    elif (problem == 'regression'):
        model.add(Dense(1, activation = 'linear', name = 'out'))
        
    #model.add(Dense(1, activation = 'softmax', name = 'out'))
    
    return model

<h1>Tunable Model </h1>

In [5]:
#scaler = MinMaxScaler(feature_range = (-1, 1))
scaler = StandardScaler()

In [6]:
def get_compiled_model(shape, problem):
    
    K.clear_session()
    
    if (problem == 'classification' or problem == 'anomaly'):
        # Parameters for the model
        # Default:  optimizer = SGD(lr = 0.01, momentum = 0.0, decay = 0.0, nesterov = False)
        optimizer = SGD(lr = 0.01, momentum = 0.0, decay = 0.0, nesterov = False)
        loss_function = 'categorical_crossentropy'
        metrics = ['accuracy']
    elif (problem == 'regression'):
        # Parameters for the model
        # Default: optimizer = Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, decay = 0.0, amsgrad = False)
        optimizer = Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = None, decay = 0.0, amsgrad = False)
        loss_function = 'mean_squared_error'
        metrics = ['mse']
        
    model = None
    
    # Create and compile the model
    model = create_ANN_model(shape, problem)
    model.compile(optimizer = optimizer, loss = loss_function, metrics = metrics)
    
    return model

In [7]:
num_features = len(selected_features)
input_shape = num_features * window_size

model = get_compiled_model(input_shape, problem)
tModel = SequenceTunableModelRegression('ANN_Model', model, lib_type = 'keras', data_handler = vHandler, data_scaler = scaler)

<h1> Loading Data from MySQL Database </h1>

In [8]:
#vHandler.connect_to_db("remoteAdmin","remoteAdmin","169.236.181.40:3306","damadics")

In [9]:
# Extract data from database
#vHandler.extract_data_from_db()
# vHandler.extract_data_from_db()

<h1> Loading Data from Local .csv File </h1>

In [10]:
# Extract data from .csv file. If loading from .csv, don't forget to uncomment .extract_data_from_db() in data_handler_VALVE'''
vHandler._df = pd.read_csv('valve_dataset.csv', sep = ',')

vHandler._df['status'] = vHandler._df['selectedFault'].apply(lambda valve: 0 if valve == 20 else 1)

vHandler._X = vHandler._df[selected_features].values
vHandler._y = vHandler._df['status'].values

In [11]:
print(vHandler._X)
print(vHandler._y)

[[8.76840e-01 6.50832e-01 2.14553e-01 1.59000e-03 1.00000e+00]
 [8.50576e-01 6.43717e-01 2.15373e-01 3.69939e-01 1.00000e+00]
 [8.49537e-01 6.47472e-01 2.14781e-01 7.32446e-01 2.22181e-01]
 ...
 [8.47251e-01 6.43362e-01 2.14299e-01 1.00000e+00 0.00000e+00]
 [8.49342e-01 6.43664e-01 2.15743e-01 1.00000e+00 3.34000e-04]
 [8.49701e-01 6.43186e-01 2.17503e-01 1.00000e+00 0.00000e+00]]
[0 0 0 ... 1 1 1]


In [12]:
#vHandler.load_data(cross_validation_ratio = 0.3, test_ratio = 0.2, unroll = True)
tModel.load_data(unroll = True, verbose = 0, cross_validation_ratio = 0.20, test_ratio = 0.20)

Loading data from database
Splitting into samples: 0:00:01.143167
counter: 130
Splitting into samples: 0:00:01.298360
counter: 130
Number of defective valves in cross-validation set: 6 out of 26.
Number of defective valves in test set: 9 out of 26.

Train, cv, and test splitting: 0:00:00.000770
Sequence length 35
Sequence stride 15
X_train len 78
X_crossVal len 26
X_test len 26
y_train len 78
y_crossVal len 26
y_test len 26
X_train[0]
(473, 5)
[[8.49007e-01 6.44780e-01 2.14126e-01 9.98019e-01 0.00000e+00]
 [8.51450e-01 6.47268e-01 2.15850e-01 1.00000e+00 1.62400e-03]
 [8.51063e-01 6.47137e-01 2.15307e-01 1.00000e+00 2.30900e-03]
 ...
 [8.51652e-01 6.44600e-01 2.17084e-01 9.99500e-01 1.10200e-03]
 [8.51125e-01 6.43791e-01 2.16319e-01 9.98465e-01 0.00000e+00]
 [8.50684e-01 6.45809e-01 2.15217e-01 1.00000e+00 4.78000e-04]]
X_crossVal[0]
(35, 5)
[[8.51446e-01 6.53266e-01 2.13839e-01 1.00000e+00 0.00000e+00]
 [8.47908e-01 6.49438e-01 2.14251e-01 1.00000e+00 0.00000e+00]
 [8.49886e-01 6.4795

In [13]:
vHandler.print_data(True)

Printing shapes

Training data (X, y)
(10562, 175)
(10562, 1)
Cross-Validation data (X, y)
(26, 175)
(26, 1)
Testing data (X, y)
(26, 175)
(26, 1)
Printing first 5 elements

Training data (X, y)
[[8.49007e-01 6.44780e-01 2.14126e-01 9.98019e-01 0.00000e+00 8.51450e-01
  6.47268e-01 2.15850e-01 1.00000e+00 1.62400e-03 8.51063e-01 6.47137e-01
  2.15307e-01 1.00000e+00 2.30900e-03 8.47950e-01 6.52469e-01 2.15426e-01
  1.00000e+00 4.58000e-04 8.51919e-01 6.53102e-01 2.15469e-01 1.00000e+00
  1.98800e-03 8.55039e-01 6.57101e-01 2.14184e-01 1.00000e+00 0.00000e+00
  8.51859e-01 6.54560e-01 2.16143e-01 9.98756e-01 0.00000e+00 8.48592e-01
  6.52224e-01 2.16725e-01 9.98825e-01 1.35800e-03 8.51566e-01 6.49168e-01
  2.14419e-01 9.98857e-01 0.00000e+00 8.46856e-01 6.45162e-01 2.14128e-01
  1.00000e+00 0.00000e+00 8.49803e-01 6.41118e-01 2.15417e-01 9.98886e-01
  3.73000e-04 8.49963e-01 6.46695e-01 2.14937e-01 1.00000e+00 0.00000e+00
  8.47459e-01 6.47689e-01 2.16048e-01 1.00000e+00 0.00000e+00 8.4

<h1> Classification Algorithm Testing </h1>

In [14]:
from sklearn.svm import SVC
# Support Vector Classifier
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC
# SVC(C=1.0, kernel=’rbf’, degree=3, gamma=’auto_deprecated’,
#     coef0=0.0, shrinking=True, probability=False,
#     tol=0.001, cache_size=200, class_weight=None,
#     verbose=False, max_iter=-1, decision_function_shape=’ovr’,
#     random_state=None)

algorithms = list()

svc = SVC(gamma = 'auto', verbose = True)
algorithms.append(svc)

In [15]:
crossVal_data, test_data = list(), list()
for algo in algorithms:
    
    start_time = datetime.now()
    algo.fit(tModel._X_train, np.ravel(tModel._y_train))
    print('Time Elapsed: {}'.format(datetime.now() - start_time))
    
    y_pred_crossVal = algo.predict(tModel.data_handler._X_crossVal)
    y_pred_test = algo.predict(tModel.data_handler._X_test)
    
    crossVal_data.append(({
        'Accuracy': metrics.accuracy_score(tModel._y_crossVal, y_pred_crossVal),
        'Precision': metrics.precision_score(tModel._y_crossVal, y_pred_crossVal),
        'Recall': metrics.recall_score(tModel._y_crossVal, y_pred_crossVal)
    }))
    test_data.append(({
        'Accuracy': metrics.accuracy_score(tModel._y_crossVal, y_pred_test),
        'Precision': metrics.precision_score(tModel._y_crossVal, y_pred_test),
        'Recall': metrics.recall_score(tModel._y_crossVal, y_pred_test)
    }))
    
    print('Algorithm Done')

[LibSVM]Time Elapsed: 0:00:31.349094
Algorithm Done


In [16]:
crossVal_results = pd.DataFrame(data = crossVal_data, columns = ['Accuracy', 'Precision', 'Recall'],
                       index = ['SVC'])
test_results = pd.DataFrame(data = test_data, columns = ['Accuracy', 'Precision', 'Recall'],
                       index = ['SVC'])

In [17]:
print(crossVal_results)

     Accuracy  Precision  Recall
SVC  0.730769        0.0     0.0


In [18]:
print(test_results)

     Accuracy  Precision    Recall
SVC  0.769231        0.5  0.166667


In [19]:
y_pred_crossVal

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [20]:
np.ravel(tModel._y_crossVal)

array([0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 1., 0., 0.])

In [21]:
true_positive, true_negative, false_positive, false_negative = 0,0,0,0

for i in range(len(y_pred_crossVal)):
    if y_pred_crossVal[i] == 0:
        if (y_pred_crossVal[i] == tModel._y_crossVal[i]):
            true_negative += 1
        else:
            false_negative += 1
    else:
        if (y_pred_crossVal[i] == tModel._y_crossVal[i]):
            true_positive += 1
        else:
            false_positive += 1
            
print(true_positive)
print(false_positive)
print(true_negative)
print(false_negative)

0
1
19
6
