In [1]:
from math import pi
import keras
import keras.backend as K
from keras.layers import LSTM,Dropout,Dense,TimeDistributed,Conv1D,MaxPooling1D,Flatten
from keras.models import Sequential
import tensorflow as tf
import gc
#from numba import jit
from IPython.display import display, clear_output
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import sys
sns.set_style("whitegrid")
import pyarrow.parquet as pq
import pandas as pd
import numpy as np
from tqdm import tqdm
from keras.models import save_model

In [2]:
# This is NN LSTM Model creation
def model_lstm(input_shape):
    # The shape was explained above, must have this order
    inp = Input(shape=(input_shape[1], input_shape[2],))
    # This is the LSTM layer
    # Bidirecional implies that the 160 chunks are calculated in both ways, 0 to 159 and 159 to zero
    # although it appear that just 0 to 159 way matter, I have tested with and without, and tha later worked best
    # 128 and 64 are the number of cells used, too many can overfit and too few can underfit
    x = Bidirectional(LSTM(128, return_sequences=True))(inp)
    # The second LSTM can give more fire power to the model, but can overfit it too
    x = Bidirectional(LSTM(64, return_sequences=True))(x)
    # A intermediate full connected (Dense) can help to deal with nonlinears outputs
    x = Dense(64, activation="relu")(x)
    # A binnary classification as this must finish with shape (1,)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    # Pay attention in the addition of matthews_correlation metric in the compilation, it is a success factor key
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[matthews_correlation])
    
    return model

In [20]:
def datetime_to_seconds(dt):
    return round(dt.microsecond * 1e-6 + dt.second + dt.minute * 60, 3)

In [21]:
def pulses_to_frequency(simulation_data):
#Dane s ustawione w kolumnach, dlatego iterujemy po szerokosci, dodatkowo dane
#dla jednej symulacji zajmuj 4 kolumny dlatego dzielimy przez 4 i mamy liczbe symulacji
    temp_drive_column = []
    temp_load_column = []
    drive_count = 0
    load_count = 0
    frequency_drive = 0
    frequency_load = 0
    angular_velocity_drive = 0
    angular_velocity_load = 0
    
    for sim_id in range(1, int(simulation_data.shape[1]/4)):
        tacho_drive_column_name = 'TachoDriveSchaft_' + str(sim_id)
        tacho_load_column_name = 'TachoLoadSchaft_' + str(sim_id)
        for sample_id in range(0,len(simulation_data)):
            
            #DLA TACHO DRIVE SHAFT
            if simulation_data[tacho_drive_column_name][sample_id] == 1:
                #jezeli mamy 1 to znaczy wykonany zostal jeden pelny obrót i liczymy predkosc
                drive_count += 1
                #TODO: zalozenie ze jest 1 bitowy, mozna zweryfkowac w simulinku i ewentualnie zamienic na
                #predkosc katowa
                frequency_drive = 1 / (drive_count * 0.1)
                angular_velocity_drive = frequency_drive * 2 * pi
                drive_count = 0
            elif simulation_data[tacho_drive_column_name][sample_id] == 0:
                #jezeli mamy 0 to zliczamy 0.1 sekundy bo co tyle są kolejne pomiary
                drive_count += 1
                
            #DLA TACHO LOAD SHAFT
            if simulation_data[tacho_load_column_name][sample_id] == 1:
                #jezeli mamy 1 to znaczy wykonany zostal jeden pelny obrót i liczymy predkosc
                load_count += 1
                #TODO: zalozenie ze jest 1 bitowy, mozna zweryfkowac w simulinku i ewentualnie zamienic na
                #predkosc katowa
                frequency_load = 1 / (load_count * 0.1)
                angular_velocity_load = frequency_load * 2 * pi
                load_count = 0
            elif simulation_data[tacho_load_column_name][sample_id] == 0:
                #jezeli mamy 0 to zliczamy 0.1 sekundy bo co tyle są kolejne pomiary
                load_count += 1

            temp_drive_column.append(angular_velocity_drive)
            temp_load_column.append(angular_velocity_load)
        simulation_data[tacho_drive_column_name] = temp_drive_column
        simulation_data[tacho_load_column_name] = temp_load_column
        temp_drive_column = []
        temp_load_column = []
    return simulation_data
        

In [22]:
def simulation_data_time_to_float(simulation_data):
#Dane s ustawione w kolumnach, dlatego iterujemy po szerokosci, dodatkowo dane
#dla jednej symulacji zajmuj 4 kolumny dlatego dzielimy przez 4 i mamy liczbe symulacji
    temp_float_time = []
    for sim_id in range(1, int(simulation_data.shape[1]/4)):
        time_column_name = 'Time_' + str(sim_id)
        for time_id in range(0,len(simulation_data)):
            temp_float_time.append(datetime_to_seconds(simulation_data[time_column_name][time_id]))
        simulation_data[time_column_name] = temp_float_time
        temp_float_time = []
    return simulation_data
        

In [23]:
number_of_simulations = 208

In [24]:
train_set = pq.read_pandas('BigData.parquet').to_pandas()

In [25]:
import datetime
import time
#DT = datetime.datetime(2016,01,30,15,16,19,234000) #trailing zeros are required

In [26]:
train = simulation_data_time_to_float(train_set)

In [27]:
train = pulses_to_frequency(train)

In [28]:
print(train)

     Time_1  Vibration_1  TachoDriveSchaft_1  TachoLoadSchaft_1  Time_2  \
0       0.0    -2.752889                   0           0.000000     0.0   
1       0.1    -2.048814                   0           0.000000     0.1   
2       0.2    -1.189470                   0           0.000000     0.2   
3       0.3    -0.504057                   0           0.000000     0.3   
4       0.4    -0.299780                   0           0.000000     0.4   
..      ...          ...                 ...                ...     ...   
296    29.6    -0.994195                   0           0.106383    29.6   
297    29.7    -0.541429                   0           0.106383    29.7   
298    29.8     0.296970                   0           0.106383    29.8   
299    29.9     0.806616                   0           0.106383    29.9   
300    30.0     0.480334                   0           0.106383    30.0   

     Vibration_2  TachoDriveSchaft_2  TachoLoadSchaft_2  Time_3  Vibration_3  \
0      -0.293971   

In [29]:
#usuniecie kolumn z czasem
for sim_id in range(1, int(round((train.shape[1]/4)+1,0))):
    train.drop([ 'Time_' + str(sim_id)], axis = 1, inplace = True) 

In [30]:
print(train)

     Vibration_1  TachoDriveSchaft_1  TachoLoadSchaft_1  Vibration_2  \
0      -2.752889                   0           0.000000    -0.293971   
1      -2.048814                   0           0.000000    -0.638681   
2      -1.189470                   0           0.000000    -1.389994   
3      -0.504057                   0           0.000000    -1.701221   
4      -0.299780                   0           0.000000    -1.120579   
..           ...                 ...                ...          ...   
296    -0.994195                   0           0.106383     0.040170   
297    -0.541429                   0           0.106383     0.747791   
298     0.296970                   0           0.106383     0.616453   
299     0.806616                   0           0.106383    -0.338532   
300     0.480334                   0           0.106383    -1.375580   

     TachoDriveSchaft_2  TachoLoadSchaft_2  Vibration_3  TachoDriveSchaft_3  \
0                     0           0.106383    -0.747718 

In [31]:
X_train = train.values

In [32]:
type(X_train)

numpy.ndarray

In [33]:
np.shape(X_train)

(301, 624)

In [34]:
#Shape 0 jest nieparzysty, dlatego tutaj usuwamy ostatni rekord, żeby móc potem zrobić reshape
X_train = np.delete(X_train, -1, 0)

In [35]:
np.shape(X_train)

(300, 624)

In [36]:
n_signals = 3
X_trained = X_train.reshape((int(X_train.shape[1]/n_signals), X_train.shape[0], n_signals))

In [37]:
print(X_trained)

[[[[-2.75288908e+00  0.00000000e+00  0.00000000e+00]
   [-2.93970624e-01  0.00000000e+00  1.06382979e-01]
   [-7.47717548e-01  0.00000000e+00  1.06382979e-01]
   ...
   [-3.03832992e-01  4.25894378e-03  2.18818381e-02]
   [ 1.69374382e+00  4.25894378e-03  2.18818381e-02]
   [ 6.96167008e-01  4.25894378e-03  5.28262018e-03]]

  [[-3.14678092e-01  4.25894378e-03  5.28262018e-03]
   [-5.75101046e-01  4.25894378e-03  5.28262018e-03]
   [ 7.09316409e-01  4.25894378e-03  1.02145046e-02]
   ...
   [ 2.37359251e-01  4.25894378e-03  2.18340611e-02]
   [ 6.54183509e-01  4.25894378e-03  1.30208333e-02]
   [ 7.38995067e-01  4.25894378e-03  1.30208333e-02]]

  [[-2.71850033e-01  4.25894378e-03  1.30208333e-02]
   [-5.32272987e-01  4.25894378e-03  1.30208333e-02]
   [ 7.52144467e-01  4.25894378e-03  7.46825990e-03]
   ...
   [ 7.38857871e-01  4.25894378e-03  7.46825990e-03]
   [ 1.15568213e+00  4.25894378e-03  1.30208333e-02]
   [ 1.04234664e+00  4.25894378e-03  1.30208333e-02]]

  ...

  [[ 7.61814

In [38]:
fault_data = pd.read_csv('FaultCodes.csv')

In [39]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(fault_data)

     SensorDrift  ShaftWear  ToothFault  FaultCode
0              1          0           0          1
1              1          0           0          1
2              1          1           0          3
3              1          1           0          3
4              1          1           0          3
5              1          1           0          3
6              1          1           0          3
7              1          1           0          3
8              1          1           0          3
9              1          1           0          3
10             1          1           0          3
11             1          1           0          3
12             1          0           1          5
13             1          1           1          7
14             0          0           1          4
15             0          0           0          0
16             0          0           0          0
17             0          0           0          0
18             0          0    

In [40]:
y_train = np.array(fault_data.iloc[:, 2].values)

In [41]:
print(y_train)

[0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1
 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0
 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0
 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1]


In [42]:
# define model
n_outputs = 1
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_signals)))
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(100))
#model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))

In [43]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC()])

In [44]:
verbose, epochs, batch_size = True, 30, 16
model.fit(X_trained, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fa8d0414400>

In [45]:
model.save('PredictFaultData_ToothFault');



INFO:tensorflow:Assets written to: PredictFaultData_ToothFault/assets


INFO:tensorflow:Assets written to: PredictFaultData_ToothFault/assets


In [46]:
y_train = np.array(fault_data.iloc[:, 1].values)

In [47]:
# define model
n_outputs = 1
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_signals)))
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(100))
#model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))

In [48]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC()])

In [49]:
verbose, epochs, batch_size = True, 30, 16
model.fit(X_trained, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fa8c0f72220>

In [50]:
model.save('PredictFaultData_ShaftWear');



INFO:tensorflow:Assets written to: PredictFaultData_ShaftWear/assets


INFO:tensorflow:Assets written to: PredictFaultData_ShaftWear/assets


In [51]:
y_train = np.array(fault_data.iloc[:, 0].values)

In [52]:
# define model
n_outputs = 1
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_signals)))
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(100))
#model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='sigmoid'))

In [53]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.AUC()])

In [54]:
verbose, epochs, batch_size = True, 30, 16
model.fit(X_trained, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fa8a3eff160>

In [55]:
model.save('PredictFaultData_ToothFault');



INFO:tensorflow:Assets written to: PredictFaultData_ToothFault/assets


INFO:tensorflow:Assets written to: PredictFaultData_ToothFault/assets
