In [1]:
# Import required libraries
import tensorflow as tf

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib


In [9]:
# Load the dataset
file_path = "/content/pipeline_leak_detection_with_timestamp.csv"  # Change this if needed
df = pd.read_csv("pipeline_leak_detection_with_timestamp.csv")

# Convert Timestamp to datetime
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

# Sort by time (important for LSTM)
df = df.sort_values("Timestamp")

# Select Features & Target
X = df.drop(columns=["Leakage", "Timestamp"])  # Drop timestamp
y = df["Leakage"]

# Standardize the Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save scaler for future use
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [10]:
# Function to create time-series sequences for LSTM
def create_sequences(X, y, time_steps=10):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i + time_steps])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

# Define time step (how many previous data points to use for prediction)
time_steps = 10

# Reshape data
X_lstm, y_lstm = create_sequences(X_scaled, y, time_steps)

# Train-Test Split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42, stratify=y_lstm)

# Print shape of final data
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (7992, 10, 3), y_train shape: (7992,)
X_test shape: (1998, 10, 3), y_test shape: (1998,)


In [11]:
# Build LSTM Model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(time_steps, X_train.shape[2])),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Binary Classification
])

# Compile Model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Print Model Summary
model.summary()


  super().__init__(**kwargs)


In [12]:
# Train the Model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                    epochs=30, batch_size=64, verbose=1)


Epoch 1/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.8751 - loss: 0.4416 - val_accuracy: 0.9199 - val_loss: 0.2797
Epoch 2/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.9212 - loss: 0.2801 - val_accuracy: 0.9199 - val_loss: 0.2800
Epoch 3/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.9210 - loss: 0.2788 - val_accuracy: 0.9199 - val_loss: 0.2793
Epoch 4/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.9181 - loss: 0.2878 - val_accuracy: 0.9199 - val_loss: 0.2791
Epoch 5/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9206 - loss: 0.2786 - val_accuracy: 0.9199 - val_loss: 0.2800
Epoch 6/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.9202 - loss: 0.2819 - val_accuracy: 0.9199 - val_loss: 0.2795
Epoch 7/30
[1m125/125

In [13]:
# Predict on Test Set
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Accuracy Score
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
Model Accuracy: 0.9199


In [14]:
model.save("lstm_pipeline_leak_model.h5")

