##Importing libraries

In [1]:
import datetime
from math import ceil
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
# from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
telemetry_df = pd.read_csv('azure/PdM_telemetry.csv') #Reading telemetry data (Volt,rpm,pressure,vibration)
telemetry_df.head()

In [3]:
telemetry_df['datetime'].min(), telemetry_df['datetime'].max()

In [4]:
telemetry_df['machineID'].value_counts()

In [5]:
telemetry_df[['volt', 'rotate', 'pressure', 'vibration']].describe()

In [6]:
telemetry_df.info()

In [7]:
failures = pd.read_csv('azure/PdM_failures.csv')
failures.head()

In [8]:
failures = failures.sort_values(by='datetime')
failures.reset_index(inplace=True, drop=True)
failures.head()

In [9]:
failures['failure'].value_counts()

In [10]:
telemetry_with_failure_df = telemetry_df.merge(failures, on=['datetime', 'machineID'], how='left')
telemetry_with_failure_df.fillna('No Failure', inplace=True)

In [11]:
telemetry_with_failure_df['failure'].value_counts()

In [12]:
machine_info = pd.read_csv('azure/PdM_machines.csv')
machine_info.head()

In [13]:
telemetry_with_failure_df = machine_info.merge(right=telemetry_with_failure_df, on=['machineID'], how='left')
telemetry_with_failure_df.head(-10)

In [14]:
telemetry_with_failure_df['model'].value_counts()

In [15]:
def strToDatetime(date_array, format):
    new_datetime = list()
    for date in date_array:
        new_datetime.append(datetime.datetime.strptime(date, format))
    return new_datetime

In [16]:
datetime_column = strToDatetime(telemetry_with_failure_df['datetime'], '%Y-%m-%d %H:%M:%S')

In [17]:
telemetry_with_failure_df['datetime'] = datetime_column

In [18]:
telemetry_with_failure_df['datetime'].min(), telemetry_with_failure_df['datetime'].max()

In [19]:
machine_ids = np.arange(1, 101)
len(machine_ids)

In [20]:
def hours_estimation(machine_id):
    df = telemetry_with_failure_df[telemetry_with_failure_df['machineID'] == machine_id]
    df.reset_index(drop=True, inplace=True)
    rul = []
    cont = len(df['failure']) - 1
    diff = 0
    while cont >= df.index.min():
        if df['failure'][cont] == 'No Failure':
            diff = diff + 1
            rul.append(diff)
        else:
            rul.append(0)
            diff = 0
        diff = rul[-1]
        cont = cont - 1
    df['hours_to_fail'] = list(reversed(rul))
    return df

In [21]:
dfs = []
for machine_id in machine_ids:
    df = hours_estimation(machine_id)
    dfs.append(df)

In [22]:
telemetry = pd.DataFrame()
for df in dfs:
    telemetry = pd.concat([telemetry, df], axis=0)

In [23]:
print(telemetry.shape[0], telemetry_with_failure_df.shape[0])

In [24]:
telemetry['seconds_to_fail'] = telemetry['hours_to_fail'] * 3600
telemetry.drop('hours_to_fail', axis=1, inplace=True)
telemetry.head()

In [25]:
# sns.heatmap(telemetry.corr(), annot=True).figure.set_size_inches(12, 9)

In [26]:
telemetry.columns

In [27]:
datetimes = telemetry['datetime']
timestamps = list()
for datetime_ in datetimes:
    timestamps.append(datetime.datetime.timestamp(datetime_))    
datetimes.shape[0], len(timestamps)

In [28]:
telemetry['timestamp'] = timestamps
#telemetry.drop('datetime', axis=1, inplace=True)
#telemetry = telemetry[['timestamp', 'machineID', 'model', 'age', 'volt', 'rotate', 'pressure', 'vibration', 'failure', 'seconds_to_fail']]
telemetry

In [29]:
sns.boxplot(x=telemetry['model'], y=telemetry['seconds_to_fail'], order=['model1', 'model2', 'model3', 'model4']).figure.set_size_inches(12, 9)

In [30]:
telemetry['model'].value_counts()

##Make change for model here 

In [31]:
model3_data = telemetry[telemetry['model'] == 'model3'].reset_index(drop=True)
model3_data

In [32]:
def create_time_step(data, machine_id):
    machine_id_data = data[data['machineID'] == machine_id]
    time_step = np.arange(1, machine_id_data.shape[0]+1)
    machine_id_data['time_step'] = time_step
    return machine_id_data

In [33]:
machineIDs = model3_data['machineID'].unique()
dataframes_with_time_step = []
for machine_id in machineIDs:
    dataframes_with_time_step.append(create_time_step(model3_data, machine_id))
len(dataframes_with_time_step)

In [34]:
model3_data = pd.DataFrame()
for df in dataframes_with_time_step:
    model3_data = pd.concat([model3_data, df])

In [35]:
#model3_data = model3_data[['age', 'volt', 'rotate', 'pressure', 'vibration', 'seconds_to_fail']]
model3_data.drop(['model', 'failure', 'datetime', 'timestamp', 'machineID'], axis=1, inplace=True)
model3_data

In [36]:
scaler = MinMaxScaler()

normalized_telemetry = pd.DataFrame(data=scaler.fit_transform(model3_data), columns=model3_data.columns)
normalized_telemetry.head()

In [37]:
#Split in 80-20% without random
train_size = ceil(normalized_telemetry.shape[0] * 0.8)
test_size = ceil(normalized_telemetry.shape[0] * 0.2)
print(f'Train size: {train_size}')
print(f'Test size: {test_size}')

In [38]:
# scaler = MinMaxScaler()

# normalized_telemetry = pd.DataFrame(data=scaler.fit_transform(model3_data), columns=model3_data.columns)
# normalized_telemetry.head()

In [39]:
#Resetting index for split
train_set = normalized_telemetry[:train_size]

test_set = normalized_telemetry[train_size:].reset_index(drop=True)

train_set.shape, test_set.shape

In [40]:
# train_size = ceil(normalized_telemetry.shape[0] * 0.7)
# test_and_val_size = ceil((normalized_telemetry.shape[0] * 0.3) / 2)
# print(f'Train size: {train_size}')
# print(f'Test and validation size: {test_and_val_size}')

In [41]:
X_train = train_set.drop('seconds_to_fail', axis=1)
X_train = np.array(X_train)
y_train = train_set['seconds_to_fail']
y_train = np.array(y_train)

X_test = test_set.drop('seconds_to_fail', axis=1)
X_test = np.array(X_test)
y_test = test_set['seconds_to_fail']
y_test = np.array(y_test)

# X_valid = val_set.drop('seconds_to_fail', axis=1)
# X_valid = np.array(X_valid)
# y_valid = val_set['seconds_to_fail']
# y_valid = np.array(y_valid)

In [42]:
def print_plot(y_test, predict):
  error = mean_absolute_error(y_test, predict)
  print(f'MAE:{error}')
  mse = mean_squared_error(y_test, predict)
  print(f'MSE:{mse}')
  X = np.arange(y_test.shape[0])
#   plt.plot(y_test, color='red')
  plt.scatter(X,y_test, color='red',s=1)
#   plt.plot(predict, color='blue')
  plt.scatter(X,predict, color='blue',s=1)
  plt.ylim([0, 0.7])
  plt.show()

In [43]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

#Bidiectional LSTM

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from keras.layers import Add

#Data transfer
train_data = X_train  
train_labels = y_train 
test_data = X_test
test_labels = y_test
# s
# Create the BiLSTM model
model = Sequential()
model.add(Bidirectional(LSTM(64, activation='relu'), input_shape=(None, 6)))
model.add(Dense(1))  # Output layer with a single neuron for forecasting

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Reshape the data for compatibility with BiLSTM input shape
train_data = np.reshape(train_data, (train_data.shape[0], 1, train_data.shape[1]))
test_data = np.reshape(test_data, (test_data.shape[0], 1, test_data.shape[1]))

#Train the model
model.fit(train_data, train_labels, epochs=10, batch_size=32)

# # # Evaluate the model
# # mse = model.evaluate(test_data, test_labels)
# # print(f"Mean Squared Error: {mse}")

# # # Make predictions
# # predictions = model.predict(test_data)
#Evaluate the model
mse = model.evaluate(test_data, test_labels)
print(f"Mean Squared Error: {mse}")

# Make predictions
predict = model.predict(test_data)
print_plot(y_test, predict)

In [None]:
print(model.summary())

#LSTM(RNN) with skip connection

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Add
#Data transfer
train_data = X_train  
train_labels = y_train 
test_data = X_test
test_labels = y_test

# Define the input layer
input_layer = Input(shape=(None, 6))

# Define the LSTM layer with skip connection
lstm_layer = LSTM(6, activation='relu', return_sequences=True)(input_layer)
skip_layer = Add()([input_layer, lstm_layer]) #concatenate([input_layer, lstm_layer])

# Define the output layer
output_layer = LSTM(64, activation='relu')(skip_layer)
output_layer = Dense(1)(output_layer)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Reshape the data for compatibility with RNN input shape
train_data = np.reshape(train_data, (train_data.shape[0], 1, train_data.shape[1]))
test_data = np.reshape(test_data, (test_data.shape[0], 1, test_data.shape[1]))

#Train the model
model.fit(train_data, train_labels, epochs=10, batch_size=32)

# # Evaluate the model
# # mse = model.evaluate(test_data, test_labels)
# # print(f"Mean Squared Error: {mse}")

# # Make predictions
# # predictions = model.predict(test_data)

# mse = model.evaluate(test_data, test_labels)
# print(f"Mean Squared Error: {mse}")

# Make predictions
predict = model.predict(test_data)
print_plot(y_test, predict)

In [None]:
print(model.summary())

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

#Data transfer
train_data = X_train  
train_labels = y_train 
test_data = X_test
test_labels = y_test

# Create the stacked LSTM model
model = Sequential()
model.add(LSTM(64, activation='relu', return_sequences=True, input_shape=(None, 6)))
model.add(LSTM(64, activation='relu'))
model.add(Dense(1))  # Output layer with a single neuron for forecasting

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Reshape the data for compatibility with LSTM input shape
train_data = np.reshape(train_data, (train_data.shape[0], 1, train_data.shape[1]))
test_data = np.reshape(test_data, (test_data.shape[0], 1, test_data.shape[1]))

#Train the model
model.fit(train_data, train_labels, epochs=10, batch_size=32)

# # Evaluate the model
# mse = model.evaluate(test_data, test_labels)
# print(f"Mean Squared Error: {mse}")

# Make predictions
predict = model.predict(test_data)
print_plot(y_test, predict)

In [None]:
print(model.summary())

In [None]:
## CNN model 

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

def create_cnn_model(input_shape):
    model = Sequential()

    # Convolutional layer
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
    
    # Max pooling layer
    model.add(MaxPooling1D(pool_size=2))
    
    # Flatten the output
    model.add(Flatten())
    
    # Dense (fully connected) layers
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1))  # Output layer (1 node for regression)
    
    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    return model


In [None]:
print(model.summary())

In [None]:
# # Make predictions
# predictions = model.predict(test_data)
# print_plot(y_test, predict)

In [None]:
##### import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Reshape the input data to fit the CNN model
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

# Create the CNN model
input_shape = X_train.shape[1:]
model = create_cnn_model(input_shape)

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# # Evaluate the model on the test set
# loss = model.evaluate(X_test, y_test)
# print("Test Loss:", loss)


In [None]:
X_test.shape

In [None]:
# Make predictions
predict = model.predict(X_test)
print_plot(y_test, predict)

In [None]:
print(model.summary())

In [None]:
# Resnet replica on 1D tabular dataset

In [None]:
from keras.models import Model
from keras.layers import Input
from keras.layers import Add
Input_layer = Input(shape=input_shape)
x = Conv1D(filters=32, kernel_size=3, padding = 'same', activation='relu', input_shape=input_shape)(Input_layer)
x = Conv1D(filters=32, kernel_size=3, padding = 'same', activation='relu')(x)
y = Add()([x, Input_layer])
temp = x
x = Conv1D(filters=32, kernel_size=3, padding = 'same', activation='relu')(y)
z = Add()([x, temp])
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
x = Dense(1)(x)  # Output layer (1 node for regression)

In [None]:
model = Model(inputs=Input_layer, outputs=x)

In [None]:
print(model.summary())

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)


In [None]:
# Make predictions
test_data = X_test
predict = model.predict(test_data)
print_plot(y_test, predict)

In [None]:
#LSTM 

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Generate some random tabular data for demonstration
train_data = X_train  
train_labels = y_train 
test_data = X_test
test_labels = y_test

# Create the LSTM model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(None, 6)))
model.add(Dense(1))  # Output layer with a single neuron for forecasting

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Reshape the data for compatibility with LSTM input shape
train_data = np.reshape(train_data, (train_data.shape[0], 1, train_data.shape[1]))
test_data = np.reshape(test_data, (test_data.shape[0], 1, test_data.shape[1]))

# Train the model
model.fit(train_data, train_labels, epochs=10, batch_size=32)
predict = model.predict(test_data)
print_plot(y_test, predict)

In [None]:
print(model.summary())

In [None]:
from tensorflow.keras.layers import Reshape

train_data = X_train  
train_labels = y_train 
test_data = X_test
test_labels = y_test

# Create the CNN+LSTM hybrid model
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(6, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Reshape((1, 64)))  # Reshape output of Flatten layer to (batch_size, 1, 64)
model.add(LSTM(64, activation='relu'))
model.add(Dense(1))  # Output layer with a single neuron for forecasting

# Compile the model
model.compile(optimizer='adam', loss='mse')

# Reshape the data for compatibility with CNN+LSTM input shape
train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1], 1))
test_data = np.reshape(test_data, (test_data.shape[0], test_data.shape[1], 1))

# Train the model
model.fit(train_data, train_labels, epochs=10, batch_size=32)
predict = model.predict(test_data)
print_plot(y_test, predict)

In [None]:
print(model.summary())