In [23]:
# Load and Preprocess the Dataset

import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv("all_logs.csv")

# Create features
df["log_duration"] = np.log1p(df["duration_s"])
df["mem_mb"] = df["mem_kb_max"] / 1024
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["hour"] = df["timestamp"].dt.hour
df["dayofweek"] = df["timestamp"].dt.dayofweek
df["tag_code"] = df["tag"].map({"build": 0, "lint": 1, "test": 2})
df["status_label"] = df["status"].map({"pass": 1, "fail": 0})

# Select features and labels
features = ["log_duration", "cpu_pct_avg", "mem_mb", "hour", "dayofweek", "tag_code"]
X = df[features].values
y = df["status_label"].values

In [24]:
# Standardize and Split the Data

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [25]:
# Decision Tree

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score, classification_report

dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

print(classification_report(y_test, y_pred_dt))
print("F1-score:", f1_score(y_test, y_pred_dt))

              precision    recall  f1-score   support

           0       0.75      0.50      0.60        18
           1       0.91      0.97      0.94        95

    accuracy                           0.89       113
   macro avg       0.83      0.73      0.77       113
weighted avg       0.89      0.89      0.88       113

F1-score: 0.9387755102040817


In [26]:
# Random Forest

from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print(classification_report(y_test, y_pred_rf))
print("F1-score (RF):", f1_score(y_test, y_pred_rf))

              precision    recall  f1-score   support

           0       1.00      0.56      0.71        18
           1       0.92      1.00      0.96        95

    accuracy                           0.93       113
   macro avg       0.96      0.78      0.84       113
weighted avg       0.93      0.93      0.92       113

F1-score (RF): 0.9595959595959596


In [27]:
#  Logistic Regression

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

print(classification_report(y_test, y_pred_lr))
print("F1-score:", f1_score(y_test, y_pred_lr))

              precision    recall  f1-score   support

           0       0.83      0.28      0.42        18
           1       0.88      0.99      0.93        95

    accuracy                           0.88       113
   macro avg       0.86      0.63      0.67       113
weighted avg       0.87      0.88      0.85       113

F1-score: 0.9306930693069307


In [28]:
# XGBoost

from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators=100, max_depth=4, use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

print(classification_report(y_test, y_pred_xgb))
print("F1-score:", f1_score(y_test, y_pred_xgb))

              precision    recall  f1-score   support

           0       0.92      0.61      0.73        18
           1       0.93      0.99      0.96        95

    accuracy                           0.93       113
   macro avg       0.92      0.80      0.85       113
weighted avg       0.93      0.93      0.92       113

F1-score: 0.9591836734693877


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [29]:
# Autoencoder

from keras.models import Model
from keras.layers import Input, Dense
from keras import regularizers

input_dim = X_scaled.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(4, activation='relu', activity_regularizer=regularizers.l1(1e-5))(input_layer)
decoded = Dense(input_dim, activation='linear')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=16, validation_split=0.1, verbose=0)

reconstructions = autoencoder.predict(X_scaled)
mse = np.mean(np.power(X_scaled - reconstructions, 2), axis=1)
threshold = np.percentile(mse[y == 1], 95)
y_pred_ae = (mse <= threshold).astype(int)

print(classification_report(y, y_pred_ae))
print("F1-score (Autoencoder):", f1_score(y, y_pred_ae))

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
              precision    recall  f1-score   support

           0       0.25      0.09      0.13        90
           1       0.85      0.95      0.89       473

    accuracy                           0.81       563
   macro avg       0.55      0.52      0.51       563
weighted avg       0.75      0.81      0.77       563

F1-score (Autoencoder): 0.8944223107569721


In [30]:
# LSTM (Sequential)

from keras.models import Sequential
from keras.layers import LSTM, Dense, Masking
from sklearn.utils import shuffle

# Group logs per pipeline
seq_len = 3
tag_map = {"build": 0, "lint": 1, "test": 2}
X_seq, y_seq = [], []


for pid, group in df.groupby("pipeline_id"):
    group = group.sort_values("timestamp")
    feat_df = pd.DataFrame({
        "log_duration": np.log1p(group["duration_s"]),
        "cpu_pct_avg": group["cpu_pct_avg"],
        "mem_mb": group["mem_kb_max"] / 1024,
        "tag_code": group["tag"].map(tag_map)
    })
    arr = feat_df.values
    if arr.shape[0] < seq_len:
        arr = np.vstack([arr, np.zeros((seq_len - arr.shape[0], arr.shape[1]))])
    else:
        arr = arr[:seq_len]
    label = int((group["status_label"].mean() == 1))
    X_seq.append(arr)
    y_seq.append(label)

X_seq, y_seq = shuffle(np.array(X_seq), np.array(y_seq), random_state=42)

# Train/test split for sequence
split = int(0.8 * len(X_seq))
X_train_seq, X_test_seq = X_seq[:split], X_seq[split:]
y_train_seq, y_test_seq = y_seq[:split], y_seq[split:]

# Train LSTM
model = Sequential([
    Masking(mask_value=0.0, input_shape=(seq_len, X_seq.shape[2])),
    LSTM(64),
    Dense(1, activation="sigmoid")
])
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_train_seq, y_train_seq, epochs=20, batch_size=16, validation_split=0.1, verbose=1)

# Predict
y_pred_lstm = (model.predict(X_test_seq) > 0.5).astype(int).flatten()

print(classification_report(y_test_seq, y_pred_lstm, target_names=["fail","pass"]))
print("F1-score (LSTM):", f1_score(y_test_seq, y_pred_lstm))

Epoch 1/20


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step - accuracy: 0.5295 - loss: 0.6593 - val_accuracy: 0.7143 - val_loss: 0.7732
Epoch 2/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.7784 - loss: 0.6007 - val_accuracy: 0.7143 - val_loss: 0.7497
Epoch 3/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.8103 - loss: 0.5636 - val_accuracy: 0.7143 - val_loss: 0.7341
Epoch 4/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8335 - loss: 0.5377 - val_accuracy: 0.7143 - val_loss: 0.7140
Epoch 5/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.7984 - loss: 0.5682 - val_accuracy: 0.7143 - val_loss: 0.6954
Epoch 6/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8317 - loss: 0.5363 - val_accuracy: 0.7143 - val_loss: 0.6814
Epoch 7/20
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [31]:
print("Decision Tree:", f1_score(y_test, y_pred_dt))
print("Random Forest:", f1_score(y_test, y_pred_rf))
print("Logistic Regression:", f1_score(y_test, y_pred_lr))
print("XGBoost:", f1_score(y_test, y_pred_xgb))
print("Autoencoder:", f1_score(y, y_pred_ae))  # Full set
print("LSTM:", f1_score(y_test_seq, y_pred_lstm))

Decision Tree: 0.9387755102040817
Random Forest: 0.9595959595959596
Logistic Regression: 0.9306930693069307
XGBoost: 0.9591836734693877
Autoencoder: 0.8944223107569721
LSTM: 0.8333333333333334


In [32]:
import os
import joblib

# Ensure directory exists
os.makedirs("models", exist_ok=True)

# Random forest - Save both model and scaler
joblib.dump(rf, "models/rf_model.joblib")
joblib.dump(scaler, "models/rf_scaler.joblib")

# LSTM - model
model.save("models/lstm_model.keras")