DATA PROCESSING

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical

data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,Wellhead_Temp. (C),Wellhead_Press (psi),MMCFD- gas,BOPD (barrel of oil produced per day),BWPD (barrel of water produced per day),BSW basic solid and water(%),CO2 mol. (%) @ 25C & 1 Atm,Gas Grav,CR-corrosion defect,leak_status
0,51.088486,1364.601753,6.784434,404.673699,293.125995,5.265676,1.038911,0.601146,0.144365,1.0
1,51.096512,1474.635955,11.280674,364.764774,208.47237,5.331766,0.961022,0.618705,0.135026,0.0
2,57.631303,1233.116598,8.786931,414.652564,228.530184,5.69627,1.295514,0.712029,0.154846,1.0
3,55.413455,1564.958133,10.793411,511.357422,161.711604,6.073352,0.801719,0.640946,0.108198,0.0
4,36.821261,1500.806676,12.748623,537.593023,185.303039,4.867634,1.049567,0.644026,0.114479,0.0


In [3]:
print(data.isnull().sum())

Wellhead_Temp. (C)                         0
Wellhead_Press (psi)                       0
MMCFD- gas                                 0
BOPD (barrel of oil produced per day)      0
BWPD (barrel of water produced per day)    0
BSW basic solid and water(%)               0
CO2 mol. (%) @ 25C & 1 Atm                 0
Gas Grav                                   0
CR-corrosion defect                        0
leak_status                                0
dtype: int64


FEATURES AND TARGETS

In [4]:
X = data.drop('leak_status', axis=1)
y = data['leak_status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print('done')

done


RESHAPE DATA FOR LSTM

In [5]:
X_train_lstm = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test_lstm = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

print('done')

done


CONVERT LABELS TO CATEGORICAL ONE-HOT ENCODING

In [6]:
y_train_lstm = to_categorical(y_train)
y_test_lstm = to_categorical(y_test)

LSTM MODEL

In [7]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam


def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units', min_value=32, max_value=256, step=32), input_shape=(1, X_train.shape[1])))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=3,
    directory='my_dir',
    project_name='LSTM_tuning'
)

# Run hyperparameter search
tuner.search(X_train_lstm, y_train_lstm, epochs=10, validation_data=(X_test_lstm, y_test_lstm))

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters:")
print(f"Units: {best_hps.get('units')}")
print(f"Learning Rate: {best_hps.get('learning_rate')}")

Trial 10 Complete [00h 00m 52s]
val_accuracy: 0.8192222317059835

Best val_accuracy So Far: 0.859000007311503
Total elapsed time: 00h 12m 04s
Best Hyperparameters:
Units: 224
Learning Rate: 0.001


TRAIN MODEL

In [8]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_lstm, y_train_lstm, epochs=10, validation_data=(X_test_lstm, y_test_lstm))

Epoch 1/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - accuracy: 0.6528 - loss: 0.9410 - val_accuracy: 0.8297 - val_loss: 0.4590
Epoch 2/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.8248 - loss: 0.4301 - val_accuracy: 0.8507 - val_loss: 0.3702
Epoch 3/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8355 - loss: 0.3827 - val_accuracy: 0.8543 - val_loss: 0.3548
Epoch 4/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8423 - loss: 0.3619 - val_accuracy: 0.8560 - val_loss: 0.3572
Epoch 5/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.8485 - loss: 0.3605 - val_accuracy: 0.8533 - val_loss: 0.3534
Epoch 6/10
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8514 - loss: 0.3488 - val_accuracy: 0.8580 - val_loss: 0.3523
Epoch 7/10
[1m219/219[

SAVE MODEL

In [17]:
model.save('lstm_model.keras')

print('done')

done


MODEL EVALUATION

In [19]:
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import roc_auc_score

loaded_model = load_model('lstm_model.keras')

y_pred_proba = loaded_model.predict(X_test_lstm)
y_pred = np.argmax(y_pred_proba, axis=1)  # Predicted classes
y_true = np.argmax(y_test_lstm, axis=1)   # True classes

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


CONFUSION MATRIX

In [20]:
from sklearn.metrics import confusion_matrix, classification_report


conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_matrix)


Confusion Matrix:
[[1466   70    0    0]
 [ 117  419   53    0]
 [   0   83  455   55]
 [   0    0   53  229]]


CLASSIFICATION REPORT

In [21]:
class_report = classification_report(y_true, y_pred, target_names=['Normal', 'Minor Leak', 'Moderate Leak', 'Severe Leak'])
print("Classification Report:")
print(class_report)

Classification Report:
               precision    recall  f1-score   support

       Normal       0.93      0.95      0.94      1536
   Minor Leak       0.73      0.71      0.72       589
Moderate Leak       0.81      0.77      0.79       593
  Severe Leak       0.81      0.81      0.81       282

     accuracy                           0.86      3000
    macro avg       0.82      0.81      0.81      3000
 weighted avg       0.85      0.86      0.85      3000



ROC_AUC SCORE

In [22]:
roc_auc = roc_auc_score(y_test_lstm, y_pred_proba, multi_class='ovr')
print("ROC-AUC Score:", roc_auc)

ROC-AUC Score: 0.967558416956429
