In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [21]:
# 1. Load event mappings
def load_event_mappings():
    website_events = {
        0: 'nan',
        1: '65.2.33.65 is not available',
        2: '3.110.32.172 is not available',
        3: '52.66.107.192 is not available',
        4: '13.126.209.90 is not available',
        5: '65.2.168.33 is not available',
        6: '13.127.178.245 is not available',
        7: '13.126.11.21 is not available',
        8: 'Reach Time is High for 13.126.11.21 (More than 0.5 Seconds)',
        9: 'Download Speed is Low for 13.126.11.21',
        10: 'Download Speed is Low for 13.126.11.21 (Less than 1000 Kbps)',
        11: 'Page Load Time is High for 13.126.11.21 (More than 0.1 Seconds)',
        12: 'HTTP Probe Duration is High for 13.126.11.21 (More than 10 milliseconds)',
        13: 'DNS Query Time is High for 13.126.11.21 (More than 40 milliseconds)',
        14: '13.233.233.130 is not available',
        15: 'DNS Query Time is High for 13.233.233.130 (More than 40 milliseconds)',
        16: 'Reach Time is High for 13.233.233.130 (More than 0.5 Seconds)',
        17: 'Download Speed is Low for 13.233.233.130 (Less than 1000 Kbps)',
        18: 'Page Load Time is High for 13.233.233.130 (More than 0.1 Seconds)',
        19: 'HTTP Probe Duration is High for 13.233.233.130 (More than 10 milliseconds)',
        20: 'DNS Query Time is High for 13.233.233.130 (More than 100 milliseconds)',
        21: 'Download Speed is Low for 13.233.233.130 (Less than 500 Kbps)',
        22: 'HTTP Probe Duration is High for 13.233.233.130 (More than 100 milliseconds)',
        23: 'Page Load Time is High for 13.233.233.130 (More than 0.5 Seconds)'
    }
    
    server_events = {
        0: 'nan',
        1: 'Number of installed packages has been changed',
        2: 'High CPU utilization (over 90% for 5m)',
        3: 'Load average is too high (per CPU load over 1.5 for 5m)',
        4: 'Zabbix agent is not available (for 3m)',
        5: 'application-server has been restarted (uptime < 10m)',
        6: '/etc/passwd has been changed'
    }
    
    return website_events, server_events

In [22]:
# 2. Load and prepare data
def prepare_data(csv_path):
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Convert clock to datetime
    df['clock'] = pd.to_datetime(df['clock'])
    
    # Load event mappings
    website_events, server_events = load_event_mappings()
    
    # Map the events to their names
    df['website_event_name'] = df['website_events'].map(website_events)
    df['server_event_name'] = df['server_events'].map(server_events)
    
    # Select features for prediction
    feature_columns = [
        'Download_Speed', 'Reach_Time', 'Time_to_First_Byte', 'HTTP_Probe_Duration',
        'Page_Load_Time', 'DNS_Query_Time', 'Status_ID', 'Failed_step_of_scenario_WEB_HEALTH_CHECK',
        'Interrupts_per_second', 'Load_average_15m_avg', 'Load_average_1m_avg',
        'Load_average_5m_avg', 'CPU_utilization', 'CPU_idle_time', 'CPU_iowait_time',
        'CPU_system_time', 'CPU_user_time', 'xvda_Disk_utilization',
        'Boot_Space_Used_in_percent', 'Available_memory_in_percent', 'Memory_utilization',
        'Space_Available', 'Boot_Space_Available', 'Available_memory', 'Total_memory'
    ]
    
    # Prepare X (features)
    X = df[feature_columns]
    
    # Prepare y (target) - we'll create two models, one for website events and one for server events
    y_website = df['website_event_name']
    y_server = df['server_event_name']
    
    # Encode the target variables
    le_website = LabelEncoder()
    le_server = LabelEncoder()
    
    y_website_encoded = le_website.fit_transform(y_website)
    y_server_encoded = le_server.fit_transform(y_server)
    
    return X, y_website_encoded, y_server_encoded, le_website, le_server

In [23]:
# 3. Train models
def train_models(X, y_website, y_server):
    # Split the data
    X_train, X_test, y_website_train, y_website_test, y_server_train, y_server_test = train_test_split(
        X, y_website, y_server, test_size=0.2, random_state=42
    )
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train website events model
    website_model = RandomForestClassifier(n_estimators=100, random_state=42)
    website_model.fit(X_train_scaled, y_website_train)
    
    # Train server events model
    server_model = RandomForestClassifier(n_estimators=100, random_state=42)
    server_model.fit(X_train_scaled, y_server_train)
    
    # Evaluate models
    print("\nWebsite Events Model Performance:")
    y_website_pred = website_model.predict(X_test_scaled)
    print(classification_report(y_website_test, y_website_pred))
    
    print("\nServer Events Model Performance:")
    y_server_pred = server_model.predict(X_test_scaled)
    print(classification_report(y_server_test, y_server_pred))
    
    return website_model, server_model, scaler

In [24]:
# 4. Function to make predictions
def predict_events(website_model, server_model, scaler, le_website, le_server, input_metrics):
    # Scale the input metrics
    input_scaled = scaler.transform([input_metrics])
    
    # Make predictions
    website_pred = website_model.predict(input_scaled)
    server_pred = server_model.predict(input_scaled)
    
    # Convert predictions back to event names
    website_event = le_website.inverse_transform(website_pred)[0]
    server_event = le_server.inverse_transform(server_pred)[0]
    
    return website_event, server_event

# Main execution
if __name__ == "__main__":
    # Load and prepare data
    X, y_website, y_server, le_website, le_server = prepare_data('C:/Users/suyog.kulkarni/OneDrive - Parkar Digital/Desktop/Unicorn/Imputed_New-Dataset_Aiops_3.csv')
    
    # Train the models
    print("Training the models...")
    website_model, server_model, scaler = train_models(X, y_website, y_server)
    
    # Example prediction
    print("\nModels are ready for predictions!")
    
    test_metrics = {
        'Download_Speed': 1084.0767,
        'Reach_Time': 0.00346,
        'Time_to_First_Byte': 0.00342,
        'HTTP_Probe_Duration': 1.066,
        'Page_Load_Time': 0.003683,
        'DNS_Query_Time': 110,
        'Status_ID': 1,
        'Failed_step_of_scenario_WEB_HEALTH_CHECK': 1,
        'Interrupts_per_second': 308.4319097,
        'Load_average_15m_avg': 0.193848,
        'Load_average_1m_avg': 0.943848,
        'Load_average_5m_avg': 0.504883,
        'CPU_utilization': 1.3696452,
        'CPU_idle_time': 98.6303548,
        'CPU_iowait_time': 0.04182,
        'CPU_system_time': 0.2341426,
        'CPU_user_time': 0.9838802,
        'xvda_Disk_utilization': 1.33915964,
        'Boot_Space_Used_in_percent': 16.13612725,
        'Available_memory_in_percent': 54.451467,
        'Memory_utilization': 46.290275,
        'Space_Available': 9388851200,
        'Boot_Space_Available': 719982592,
        'Available_memory': 546762752,
        'Total_memory': 1003925504
    }
    
    # Convert test_metrics to the correct format
    test_df = pd.DataFrame([test_metrics])
    test_metrics_array = test_df[X.columns].values[0]  # Ensure same column order as training data
    
    # Make predictions
    website_event, server_event = predict_events(
        website_model, server_model, scaler, 
        le_website, le_server, test_metrics_array
    )
    
    print("\nPredictions for test metrics:")
    print(f"Predicted Website Event: {website_event}")
    print(f"Predicted Server Event: {server_event}")

Training the models...

Website Events Model Performance:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           2       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         1
           5       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         1
           8       0.25      0.01      0.01       142
           9       0.00      0.00      0.00        21
          10       0.00      0.00      0.00        69
          11       0.00      0.00      0.00         2
          12       0.00      0.00      0.00        31
          13       0.13      0.07      0.09        45
          14       0.00      0.00      0.00        37
          15       0.00      0.00      0.00         0
          16       0.00      0.00      0.00        18
          17       0.46      0.49      0.48        87
          18       0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
