In [13]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import os


# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [14]:
df = pd.read_csv('generated_dataset.csv')

# Binning Target
percentile = 0.8
threshold = df['CR-corrosion defect'].quantile(percentile)
df['Leak_Status'] = np.where(df['CR-corrosion defect'] > threshold, 'Leak', 'No Leak')

le = LabelEncoder()
y = le.fit_transform(df['Leak_Status'])

X = df.drop(['CR-corrosion defect', 'Leak_Status'], axis=1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# LSTM expects 3D input: (samples, timesteps, features)
# For tabular data, use timesteps=1
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# One-hot encode target for Keras
y_cat = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_cat, test_size=0.2, random_state=42)

In [15]:

model = Sequential([
    LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(y_cat.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop]
)

Epoch 1/50


  super().__init__(**kwargs)


[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.7797 - loss: 0.5476 - val_accuracy: 0.7923 - val_loss: 0.3801
Epoch 2/50
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8125 - loss: 0.3639 - val_accuracy: 0.8877 - val_loss: 0.3014
Epoch 3/50
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8954 - loss: 0.2912 - val_accuracy: 0.9338 - val_loss: 0.2083
Epoch 4/50
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9230 - loss: 0.2146 - val_accuracy: 0.9587 - val_loss: 0.1701
Epoch 5/50
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9305 - loss: 0.1966 - val_accuracy: 0.9636 - val_loss: 0.1560
Epoch 6/50
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9370 - loss: 0.1843 - val_accuracy: 0.9605 - val_loss: 0.1484
Epoch 7/50
[1m206/206[0m [32m━━━━━━━

In [16]:

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Predict and classification report
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes, target_names=le.classes_))

[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9593 - loss: 0.1343
Test Accuracy: 0.9704
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
              precision    recall  f1-score   support

        Leak       0.95      0.89      0.92       377
     No Leak       0.97      0.99      0.98      1682

    accuracy                           0.97      2059
   macro avg       0.96      0.94      0.95      2059
weighted avg       0.97      0.97      0.97      2059



In [17]:
model.save("lstm_leak_detection_model.keras")

In [18]:
from tensorflow.keras.models import load_model


# Load the model and scaler for prediction
model = load_model("lstm_leak_detection_model.keras", compile=False)

# Example prediction
example = pd.DataFrame([{
    'Wellhead Temp. (C)': 85,
    'Wellhead Press (psi)': 2500,
    'MMCFD- gas': 12,
    'BOPD (barrel of oil produced per day)': 500,
    'BWPD (barrel of water produced per day)': 200,
    'BSW - basic solid and water (%)': 5,
    'CO2 mol. (%) @ 25 C & 1 Atm.': 2.5,
    'Gas Grav.': 0.7
}])

example_scaled = scaler.transform(example).reshape((1, 1, X.shape[1]))
pred = model.predict(example_scaled)

# Display probability for each class in percentage
for label, prob in zip(le.classes_, pred[0]):
    print(f"Probability of {label}: {prob * 100:.2f}%")

# Display predicted class as before
pred_label = le.inverse_transform([np.argmax(pred)])
print("Predicted Leak Status:", pred_label[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
Probability of Leak: 1.81%
Probability of No Leak: 98.19%
Predicted Leak Status: No Leak
