In [1]:
# get_ipython().magic('matplotlib inline')

# %tensorflow_version 2.x
import tensorflow as tf

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Bidirectional

from tensorflow.python.keras import backend as K

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
# # user-defined libraries
# import utilities.datapreprocessing as dp
# import utilities.performancemetrics as pm
# import utilities.simpleplots as sp

from numpy.random import seed
seed(1)

SEED = 123  # used to help randomly select the data points
DATA_SPLIT_PCT = 0.2

from pylab import rcParams
rcParams['figure.figsize'] = 8, 6
plt.rcParams.update({'font.size': 22})

In [2]:
df = pd.read_csv("Imputed_New-Dataset_Aiops_3.csv")
df.head(n=5)  # visualize the data.

Unnamed: 0,hostid,clock,Download_Speed,Reach_Time,Time_to_First_Byte,HTTP_Probe_Duration,Page_Load_Time,DNS_Query_Time,Status_ID,Failed_step_of_scenario_WEB_HEALTH_CHECK,...,Available_memory_in_percent,Memory_utilization,System_local_time,Space_Available,Boot_Space_Available,Available_memory,Total_memory,website_events,server_events,event_value
0,10640,10/1/2024 18:19,58357.4652,0.00559,0.00526,2.073,0.001847,25.8,1,1.0,...,67.063513,32.936487,1727786986,12294438912,719982592,673267712.0,1003925504,0,0,20.279284
1,10640,10/1/2024 18:20,60076.61089,0.00526,0.00506,1.949,0.001725,26.0,1,1.0,...,67.065145,32.934855,1727787046,12294438912,719982592,673284096.0,1003925504,0,0,23.100124
2,10640,10/1/2024 18:21,55257.10544,0.00496,0.00472,1.582,0.001656,25.0,1,1.0,...,67.066777,32.933223,1727787106,12294438912,719982592,673300480.0,1003925504,0,0,25.4
3,10640,10/1/2024 18:22,51892.74293,0.00513,0.00487,1.702,0.001843,24.0,1,1.0,...,67.068409,32.931591,1727787166,12294438912,719982592,673316864.0,1003925504,0,0,22.475529
4,10640,10/1/2024 18:23,58481.3342,0.00495,0.00568,2.148,0.001566,14.0,1,1.0,...,67.069633,32.930367,1727787226,12294434816,719982592,673329152.0,1003925504,0,0,25.0


In [19]:
# 1. Load event mappings
def load_event_mappings():
    website_events = {
        0: 'nan',
        1: '65.2.33.65 is not available',
        2: '3.110.32.172 is not available',
        3: '52.66.107.192 is not available',
        4: '13.126.209.90 is not available',
        5: '65.2.168.33 is not available',
        6: '13.127.178.245 is not available',
        7: '13.126.11.21 is not available',
        8: 'Reach Time is High for 13.126.11.21 (More than 0.5 Seconds)',
        9: 'Download Speed is Low for 13.126.11.21',
        10: 'Download Speed is Low for 13.126.11.21 (Less than 1000 Kbps)',
        11: 'Page Load Time is High for 13.126.11.21 (More than 0.1 Seconds)',
        12: 'HTTP Probe Duration is High for 13.126.11.21 (More than 10 milliseconds)',
        13: 'DNS Query Time is High for 13.126.11.21 (More than 40 milliseconds)',
        14: '13.233.233.130 is not available',
        15: 'DNS Query Time is High for 13.233.233.130 (More than 40 milliseconds)',
        16: 'Reach Time is High for 13.233.233.130 (More than 0.5 Seconds)',
        17: 'Download Speed is Low for 13.233.233.130 (Less than 1000 Kbps)',
        18: 'Page Load Time is High for 13.233.233.130 (More than 0.1 Seconds)',
        19: 'HTTP Probe Duration is High for 13.233.233.130 (More than 10 milliseconds)',
        20: 'DNS Query Time is High for 13.233.233.130 (More than 100 milliseconds)',
        21: 'Download Speed is Low for 13.233.233.130 (Less than 500 Kbps)',
        22: 'HTTP Probe Duration is High for 13.233.233.130 (More than 100 milliseconds)',
        23: 'Page Load Time is High for 13.233.233.130 (More than 0.5 Seconds)'
    }

    server_events = {
        0: 'nan',
        1: 'Number of installed packages has been changed',
        2: 'High CPU utilization (over 90% for 5m)',
        3: 'Load average is too high (per CPU load over 1.5 for 5m)',
        4: 'Zabbix agent is not available (for 3m)',
        5: 'application-server has been restarted (uptime < 10m)',
        6: '/etc/passwd has been changed'
    }

    return website_events, server_events

In [3]:
# Prepare features (X) and targets (y)
# Remove target columns and any non-numeric columns
feature_columns = df.select_dtypes(include=[np.number]).columns
feature_columns = feature_columns.drop(['website_events', 'server_events'], errors='ignore')

# Convert to numpy arrays
input_X = df[feature_columns].values
input_y_website = df['website_events'].values
input_y_server = df['server_events'].values

# Get number of features
n_features = input_X.shape[1]

print("Number of features:", n_features)
print("Shape of input_X:", input_X.shape)
print("Shape of input_y_website:", input_y_website.shape)
print("Shape of input_y_server:", input_y_server.shape)

# Split the data into train and test sets
X_train, X_test, y_website_train, y_website_test, y_server_train, y_server_test = train_test_split(
    input_X, 
    input_y_website, 
    input_y_server, 
    test_size=0.2, 
    random_state=42
)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define TIMESTEPS for LSTM
TIMESTEPS = 5  # You can adjust this value
N_FEATURES = n_features

print("\nTraining data shapes:")
print("X_train_scaled:", X_train_scaled.shape)
print("y_website_train:", y_website_train.shape)
print("y_server_train:", y_server_train.shape)

Number of features: 28
Shape of input_X: (20192, 28)
Shape of input_y_website: (20192,)
Shape of input_y_server: (20192,)

Training data shapes:
X_train_scaled: (16153, 28)
y_website_train: (16153,)
y_server_train: (16153,)


In [7]:
# Define input_y for website events
input_y = df['website_events'].values

# Define temporalize function
def temporalize(X, y, lookback):
    output_X = []
    output_y = []
    for i in range(len(X) - lookback):
        t = []
        for j in range(lookback):
            t.append(X[i + j])
        output_X.append(t)
        output_y.append(y[i + lookback])
    return np.array(output_X), np.array(output_y)

# Use the function
lookback = 5
X, y = temporalize(X=input_X, y=input_y, lookback=lookback)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    np.array(X),
    np.array(y),
    test_size=DATA_SPLIT_PCT,
    random_state=SEED)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train,
    y_train,
    test_size=DATA_SPLIT_PCT,
    random_state=SEED)

In [9]:
X_train.shape

TIMESTEPS = X_train.shape[1]  # equal to the lookback
N_FEATURES = X_train.shape[2]  # 

In [10]:
# Create flatten and scale functions
def flatten(X):
    flattened_X = np.empty((X.shape[0], X.shape[2]))  # (samples, features)
    for i in range(X.shape[0]):
        flattened_X[i] = X[i, -1, :]
    return flattened_X

def scale(X, scaler):
    for i in range(X.shape[0]):
        X[i, :, :] = scaler.transform(X[i, :, :])
    return X

# Initialize and fit scaler
scaler = StandardScaler().fit(flatten(X_train))
X_train_scaled = scale(X_train, scaler)

In [11]:
# Scale validation and test sets using the same scaler
X_valid_scaled = scale(X_valid, scaler)
X_test_scaled = scale(X_test, scaler)

# Print shapes to verify
print("Scaled shapes:")
print("X_train_scaled:", X_train_scaled.shape)
print("X_valid_scaled:", X_valid_scaled.shape)
print("X_test_scaled:", X_test_scaled.shape)

Scaled shapes:
X_train_scaled: (12919, 5, 28)
X_valid_scaled: (3230, 5, 28)
X_test_scaled: (4038, 5, 28)


In [12]:
# Prepare features (X) and targets (y)
# Remove target columns and any non-numeric columns
feature_columns = df.select_dtypes(include=[np.number]).columns
feature_columns = feature_columns.drop(['website_events', 'server_events'], errors='ignore')

# Convert to numpy arrays
input_X = df[feature_columns].values
input_y_website = df['website_events'].values
input_y_server = df['server_events'].values

# Get number of features
n_features = input_X.shape[1]

In [13]:
model = Sequential()
model.add(Input(shape=(TIMESTEPS, N_FEATURES),
                name='input'))
model.add(
    LSTM(units=16,
         activation='tanh',
         recurrent_activation='sigmoid',
         return_sequences=True,
         name='lstm_layer_1'))
model.add(
    LSTM(units=8,
         activation='tanh',
         recurrent_activation='sigmoid',
         return_sequences=False,
         name='lstm_layer_2'))
model.add(Dense(units=1,
                activation='sigmoid',
                name='output'))

model.summary()

In [14]:
# Number of parameters = 4l(p + l + 1),
# l = layer size, p = number of features.
4*16*(n_features + 16 + 1)

2880

In [15]:
# First modify compilation to use binary metrics
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=[
                  'accuracy',
                  tf.keras.metrics.Recall(),
                  tf.keras.metrics.BinaryAccuracy(),
                  tf.keras.metrics.F1Score(),
                  tf.keras.metrics.FalsePositives()
              ])

# Then modify the training data shapes
y_train = y_train.reshape(-1, 1)
y_valid = y_valid.reshape(-1, 1)

history = model.fit(x=X_train_scaled,
                    y=y_train,
                    batch_size=128,
                    epochs=100,
                    validation_data=(X_valid_scaled, y_valid),
                    verbose=0).history

## Prediction

In [16]:
# 1. First, define the exact 25 features we want to use
feature_columns = [
    'Download_Speed', 'Reach_Time', 'Time_to_First_Byte', 'HTTP_Probe_Duration',
    'Page_Load_Time', 'DNS_Query_Time', 'Status_ID', 'Failed_step_of_scenario_WEB_HEALTH_CHECK',
    'Interrupts_per_second', 'Load_average_15m_avg', 'Load_average_1m_avg',
    'Load_average_5m_avg', 'CPU_utilization', 'CPU_idle_time', 'CPU_iowait_time',
    'CPU_system_time', 'CPU_user_time', 'xvda_Disk_utilization',
    'Boot_Space_Used_in_percent', 'Available_memory_in_percent', 'Memory_utilization',
    'Space_Available', 'Boot_Space_Available', 'Available_memory', 'Total_memory'
]

# 2. Prepare the data with only these features
input_X = df[feature_columns].values
input_y_website = df['website_events'].values
input_y_server = df['server_events'].values

# 3. Create sequences
lookback = 5
X, y = temporalize(X=input_X, y=input_y_website, lookback=lookback)

# 4. Split and scale
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=DATA_SPLIT_PCT, random_state=SEED)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=DATA_SPLIT_PCT, random_state=SEED)

# 5. Scale the data
scaler = StandardScaler()
X_train_scaled = np.array([scaler.fit_transform(x) for x in X_train])
X_valid_scaled = np.array([scaler.transform(x) for x in X_valid])
X_test_scaled = np.array([scaler.transform(x) for x in X_test])

# 6. Define and train the model
TIMESTEPS = X_train.shape[1]
N_FEATURES = len(feature_columns)

model = Sequential()
model.add(Input(shape=(TIMESTEPS, N_FEATURES), name='input'))
model.add(LSTM(units=16,
               activation='relu',
               recurrent_activation='sigmoid',
               return_sequences=True,
               name='lstm_layer_1'))
model.add(LSTM(units=8,
               activation='relu',
               recurrent_activation='sigmoid',
               return_sequences=False,
               name='lstm_layer_2'))
model.add(Dense(units=1,
                activation='sigmoid',
                name='output'))

model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

# 7. Train the model
history = model.fit(x=X_train_scaled,
                   y=y_train,
                   batch_size=128,
                   epochs=100,
                   validation_data=(X_valid_scaled, y_valid),
                   verbose=1)

# 8. Prediction function
def predict_events(model, scaler, input_metrics, timesteps=5):
    """
    Make predictions using the trained LSTM model
    """
    if isinstance(input_metrics, dict):
        input_df = pd.DataFrame([input_metrics])
        input_metrics = input_df[feature_columns].values[0]

    sequence = np.array([input_metrics] * timesteps)
    sequence = sequence.reshape(1, timesteps, len(feature_columns))
    
    scaled_sequence = np.zeros_like(sequence)
    for i in range(sequence.shape[0]):
        scaled_sequence[i] = scaler.transform(sequence[i])
    
    prediction = model.predict(scaled_sequence, verbose=0)
    probability = prediction[0][0]
    
    return {
        'event_predicted': 1 if probability >= 0.5 else 0,
        'probability': probability
    }


Epoch 1/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 3.0877e-05 - loss: -3.4560 - val_accuracy: 0.0424 - val_loss: 6276413.0000
Epoch 2/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.2127e-05 - loss: -5437.9429 - val_accuracy: 0.0000e+00 - val_loss: 1752808064.0000
Epoch 3/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 7.8710e-06 - loss: -436927.8125 - val_accuracy: 0.0000e+00 - val_loss: 57452802048.0000
Epoch 4/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 1.4468e-04 - loss: -8456506.0000 - val_accuracy: 0.0000e+00 - val_loss: 660907819008.0000
Epoch 5/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 4.8389e-05 - loss: -79892008.0000 - val_accuracy: 0.0000e+00 - val_loss: 4037895520256.0000
Epoch 6/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

In [17]:
# 9. Test prediction
test_metrics = {
    
    'Download_Speed': 1084.0767,
    'Reach_Time': 0.00346,
    'Time_to_First_Byte': 0.00342,
    'HTTP_Probe_Duration': 1.066,
    'Page_Load_Time': 0.003683,
    'DNS_Query_Time': 110,
    'Status_ID': 1,
    'Failed_step_of_scenario_WEB_HEALTH_CHECK': 1,
    'Interrupts_per_second': 308.4319097,
    'Load_average_15m_avg': 0.193848,
    'Load_average_1m_avg': 0.943848,
    'Load_average_5m_avg': 0.504883,
    'CPU_utilization': 1.3696452,
    'CPU_idle_time': 98.6303548,
    'CPU_iowait_time': 0.04182,
    'CPU_system_time': 0.2341426,
    'CPU_user_time': 0.9838802,
    'xvda_Disk_utilization': 1.33915964,
    'Boot_Space_Used_in_percent': 16.13612725,
    'Available_memory_in_percent': 54.451467,
    'Memory_utilization': 46.290275,
    'Space_Available': 9388851200,
    'Boot_Space_Available': 719982592,
    'Available_memory': 546762752,
  
    'Total_memory': 1003925504
}

prediction = predict_events(model, scaler, test_metrics)
print(f"Event Predicted: {prediction['event_predicted']}")
print(f"Probability: {prediction['probability']:.4f}")

Event Predicted: 1
Probability: 1.0000


In [33]:
def create_lstm_model(timesteps, n_features):
    """
    Create an LSTM model for time series prediction
    """
    model = Sequential([
        Input(shape=(timesteps, n_features)),
        LSTM(units=64, return_sequences=True),
        Dropout(0.2),
        LSTM(units=32, return_sequences=False),
        Dropout(0.2),
        Dense(units=16, activation='relu'),
        Dense(units=1, activation='sigmoid')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', 
                tf.keras.metrics.Recall(),
                tf.keras.metrics.Precision(),
                tf.keras.metrics.AUC()]
    )
    
    return model

In [34]:
def prepare_data(df, feature_columns, target_column, lookback=5, test_size=0.2):
    """
    Prepare and scale data for LSTM model
    """
    # Prepare features and target
    X = df[feature_columns].values
    y = df[target_column].values
    
    # Create sequences
    X_sequences = []
    y_sequences = []
    
    for i in range(len(X) - lookback):
        X_sequences.append(X[i:(i + lookback)])
        y_sequences.append(y[i + lookback])
    
    X_sequences = np.array(X_sequences)
    y_sequences = np.array(y_sequences)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X_sequences, y_sequences, test_size=test_size, random_state=42
    )
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = np.array([scaler.fit_transform(x) for x in X_train])
    X_test_scaled = np.array([scaler.transform(x) for x in X_test])
    
    return X_train_scaled, X_test_scaled, y_train, y_test, scaler

In [35]:
def predict_anomaly(model, scaler, input_metrics, feature_columns, lookback=5):
    """
    Make predictions using the trained LSTM model
    """
    # Prepare input data
    if isinstance(input_metrics, dict):
        input_df = pd.DataFrame([input_metrics])
        input_metrics = input_df[feature_columns].values[0]
    
    # Create sequence
    sequence = np.array([input_metrics] * lookback)
    sequence = sequence.reshape(1, lookback, len(feature_columns))
    
    # Scale sequence
    scaled_sequence = np.array([scaler.transform(seq) for seq in sequence])
    
    # Make prediction
    prediction = model.predict(scaled_sequence, verbose=0)
    probability = prediction[0][0]
    
    return {
        'anomaly_detected': bool(probability >= 0.5),
        'probability': float(probability)
    }

# Usage example:
if __name__ == "__main__":
    # Define features
    feature_columns = [
        'Download_Speed', 'Reach_Time', 'Time_to_First_Byte', 
        'HTTP_Probe_Duration', 'Page_Load_Time', 'DNS_Query_Time',
        'CPU_utilization', 'Memory_utilization', 'Load_average_1m_avg'
        # Add other relevant features
    ]
    
    # Prepare data
    X_train_scaled, X_test_scaled, y_train, y_test, scaler = prepare_data(
        df=df,
        feature_columns=feature_columns,
        target_column='website_events',
        lookback=5
    )
    
    # Create and train model
    model = create_lstm_model(
        timesteps=5,
        n_features=len(feature_columns)
    )
    
    # Train model
    history = model.fit(
        X_train_scaled,
        y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True
            )
        ]
    )
    
    # Example prediction
    test_metrics = {
        'Download_Speed': 1084.0767,
        'Reach_Time': 0.00346,
        'Time_to_First_Byte': 0.00342,
        'HTTP_Probe_Duration': 1.066,
        'Page_Load_Time': 0.003683,
        'DNS_Query_Time': 110,
        'CPU_utilization': 1.3696452,
        'Memory_utilization': 46.290275,
        'Load_average_1m_avg': 0.943848
    }
    
    prediction = predict_anomaly(model, scaler, test_metrics, feature_columns)
    print(f"Anomaly Detected: {prediction['anomaly_detected']}")
    print(f"Probability: {prediction['probability']:.4f}")

Epoch 1/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.0455 - auc: 0.5193 - loss: -18.5313 - precision: 0.1211 - recall_1: 0.9586 - val_accuracy: 0.0000e+00 - val_auc: 0.5552 - val_loss: -153.6347 - val_precision: 0.1279 - val_recall_1: 1.0000
Epoch 2/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0048 - auc: 0.5733 - loss: -193.7206 - precision: 0.1227 - recall_1: 0.9986 - val_accuracy: 0.0000e+00 - val_auc: 0.6478 - val_loss: -511.5312 - val_precision: 0.1279 - val_recall_1: 1.0000
Epoch 3/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.0097 - auc: 0.6178 - loss: -562.8975 - precision: 0.1236 - recall_1: 0.9965 - val_accuracy: 0.1142 - val_auc: 0.6446 - val_loss: -1118.4993 - val_precision: 0.1420 - val_recall_1: 0.9806
Epoch 4/100
[1m404/404[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.2009 - auc: 0.6559 - loss: -11

In [36]:
# 1. First define your input metrics for prediction
test_metrics = {
    'Download_Speed': 1084.0767,
    'Reach_Time': 0.00346,
    'Time_to_First_Byte': 0.00342,
    'HTTP_Probe_Duration': 1.066,
    'Page_Load_Time': 0.003683,
    'DNS_Query_Time': 110,
    'CPU_utilization': 1.3696452,
    'Memory_utilization': 46.290275,
    'Load_average_1m_avg': 0.943848,
    'Load_average_5m_avg': 0.504883,
    'Load_average_15m_avg': 0.193848,
    'CPU_idle_time': 98.6303548,
    'CPU_iowait_time': 0.04182,
    'CPU_system_time': 0.2341426,
    'CPU_user_time': 0.9838802,
    'Available_memory_in_percent': 54.451467
}

# 2. Function to make predictions
def make_prediction(model, scaler, input_data, feature_columns, timesteps=5):
    """
    Make a prediction for a single input
    """
    # Prepare input data
    input_df = pd.DataFrame([input_data])
    sequence = input_df[feature_columns].values
    
    # Create sequence of required timesteps
    sequence = np.repeat(sequence, timesteps, axis=0)
    sequence = sequence.reshape(1, timesteps, len(feature_columns))
    
    # Scale the sequence
    scaled_sequence = np.zeros_like(sequence)
    for i in range(sequence.shape[0]):
        scaled_sequence[i] = scaler.transform(sequence[i])
    
    # Make prediction
    prediction = model.predict(scaled_sequence, verbose=0)
    probability = float(prediction[0][0])
    
    return {
        'prediction': 1 if probability >= 0.5 else 0,
        'probability': probability,
        'interpretation': 'Anomaly Detected' if probability >= 0.5 else 'Normal'
    }

# 3. Make prediction
result = make_prediction(model, scaler, test_metrics, feature_columns)

# 4. Print results
print("\nPrediction Results:")
print("-" * 50)
print(f"Prediction: {result['prediction']}")
print(f"Probability: {result['probability']:.4f}")
print(f"Interpretation: {result['interpretation']}")

# 5. Optional: If you want to make multiple predictions
def batch_predict(model, scaler, input_data_list, feature_columns):
    """
    Make predictions for multiple inputs
    """
    results = []
    for data in input_data_list:
        result = make_prediction(model, scaler, data, feature_columns)
        results.append(result)
    return results

# Example of multiple predictions
test_cases = [
    test_metrics,  # First case
    {**test_metrics, 'CPU_utilization': 95.0},  # Second case with high CPU
    {**test_metrics, 'Memory_utilization': 90.0}  # Third case with high memory
]

print("\nBatch Predictions:")
print("-" * 50)
for i, result in enumerate(batch_predict(model, scaler, test_cases, feature_columns)):
    print(f"\nCase {i+1}:")
    print(f"Prediction: {result['prediction']}")
    print(f"Probability: {result['probability']:.4f}")
    print(f"Interpretation: {result['interpretation']}")


Prediction Results:
--------------------------------------------------
Prediction: 0
Probability: 0.0578
Interpretation: Normal

Batch Predictions:
--------------------------------------------------

Case 1:
Prediction: 0
Probability: 0.0578
Interpretation: Normal

Case 2:
Prediction: 1
Probability: 1.0000
Interpretation: Anomaly Detected

Case 3:
Prediction: 1
Probability: 1.0000
Interpretation: Anomaly Detected
