In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [7]:
# 1. Load event mappings
def load_event_mappings():
    # Create mappings dictionary from your event_mappings.txt
    website_events = {
        0: 'nan', 2: '1', 3: '2', 4: '3', 5: '4', 6: '5', 7: '6', 
        8: '7', 9: '8', 10: '9', 11: '10', 12: '11', 13: '12', 
        14: '13', 15: '14', 16: '15', 17: '16', 18: '17', 19: '18', 
        20: '19', 21: '20', 22: '21', 23: '22', 24: '23'
    }
    
    server_events = {
        0: 'nan', 2: '1', 3: '2', 4: '3', 5: '4', 6: '5', 7: '6'
    }
    
    return website_events, server_events


In [8]:
# 2. Load and prepare data
def prepare_data(csv_path):
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Convert clock to datetime
    df['clock'] = pd.to_datetime(df['clock'])
    
    # Load event mappings
    website_events, server_events = load_event_mappings()
    
    # Map the events to their names
    df['website_event_name'] = df['website_events'].map(website_events)
    df['server_event_name'] = df['server_events'].map(server_events)
    
    # Select features for prediction
    feature_columns = [
        'Download_Speed', 'Reach_Time', 'Time_to_First_Byte', 'HTTP_Probe_Duration',
        'Page_Load_Time', 'DNS_Query_Time', 'Status_ID', 'Failed_step_of_scenario_WEB_HEALTH_CHECK',
        'Interrupts_per_second', 'Load_average_15m_avg', 'Load_average_1m_avg',
        'Load_average_5m_avg', 'CPU_utilization', 'CPU_idle_time', 'CPU_iowait_time',
        'CPU_system_time', 'CPU_user_time', 'xvda_Disk_utilization',
        'Boot_Space_Used_in_percent', 'Available_memory_in_percent', 'Memory_utilization',
        'Space_Available', 'Boot_Space_Available', 'Available_memory', 'Total_memory'
    ]
    
    # Prepare X (features)
    X = df[feature_columns]
    
    # Prepare y (target) - we'll create two models, one for website events and one for server events
    y_website = df['website_event_name']
    y_server = df['server_event_name']
    
    # Encode the target variables
    le_website = LabelEncoder()
    le_server = LabelEncoder()
    
    y_website_encoded = le_website.fit_transform(y_website)
    y_server_encoded = le_server.fit_transform(y_server)
    
    return X, y_website_encoded, y_server_encoded, le_website, le_server

In [9]:
# 3. Train models
def train_models(X, y_website, y_server):
    # Split the data
    X_train, X_test, y_website_train, y_website_test, y_server_train, y_server_test = train_test_split(
        X, y_website, y_server, test_size=0.2, random_state=42
    )
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train website events model
    website_model = RandomForestClassifier(n_estimators=100, random_state=42)
    website_model.fit(X_train_scaled, y_website_train)
    
    # Train server events model
    server_model = RandomForestClassifier(n_estimators=100, random_state=42)
    server_model.fit(X_train_scaled, y_server_train)
    
    # Evaluate models
    print("\nWebsite Events Model Performance:")
    y_website_pred = website_model.predict(X_test_scaled)
    print(classification_report(y_website_test, y_website_pred))
    
    print("\nServer Events Model Performance:")
    y_server_pred = server_model.predict(X_test_scaled)
    print(classification_report(y_server_test, y_server_pred))
    
    return website_model, server_model, scaler

In [10]:
# 4. Function to make predictions
def predict_events(website_model, server_model, scaler, le_website, le_server, input_metrics):
    # Scale the input metrics
    input_scaled = scaler.transform([input_metrics])
    
    # Make predictions
    website_pred = website_model.predict(input_scaled)
    server_pred = server_model.predict(input_scaled)
    
    # Convert predictions back to event names
    website_event = le_website.inverse_transform(website_pred)[0]
    server_event = le_server.inverse_transform(server_pred)[0]
    
    return website_event, server_event

# Main execution
if __name__ == "__main__":
    # Load and prepare data
    X, y_website, y_server, le_website, le_server = prepare_data('Imputed_New-Dataset_Aiops_3.csv')
    
    # Train the models
    print("Training the models...")
    website_model, server_model, scaler = train_models(X, y_website, y_server)
    
    # Example prediction
    print("\nModels are ready for predictions!")
    
    # Example of how to use the models
    sample_metrics = X.iloc[0].values  # Using first row as example
    website_event, server_event = predict_events(
        website_model, server_model, scaler, 
        le_website, le_server, sample_metrics
    )
    print(f"\nPredicted Website Event: {website_event}")
    print(f"Predicted Server Event: {server_event}")

Training the models...

Website Events Model Performance:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         0
           3       0.25      0.01      0.01       142
           4       0.00      0.00      0.00         3
           5       0.00      0.00      0.00        69
           6       0.00      0.00      0.00        20
           7       0.13      0.07      0.09        45
           8       0.00      0.00      0.00        11
           9       0.00      0.00      0.00        18
          10       0.00      0.00      0.00        21
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00        37
          13       0.46      0.49      0.48        87
          14       0.00      0.00      0.00        15
          17       0.00      0.00      0.00         0
          18       0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Define event mappings
def get_event_mappings():
    server_events = {
        0: 'nan',
        1: 'Number of installed packages has been changed',
        2: 'High CPU utilization (over 90% for 5m)',
        3: 'Load average is too high (per CPU load over 1.5 for 5m)',
        4: 'Zabbix agent is not available (for 3m)',
        5: 'application-server has been restarted (uptime < 10m)',
        6: '/etc/passwd has been changed'
    }
    
    website_events = {
        0: 'nan',
        1: '65.2.33.65 is not available',
        2: '3.110.32.172 is not available',
        3: '52.66.107.192 is not available',
        4: '13.126.209.90 is not available',
        5: '65.2.168.33 is not available',
        6: '13.127.178.245 is not available',
        7: '13.126.11.21 is not available',
        8: 'Reach Time is High for 13.126.11.21 (More than 0.5 Seconds)',
        9: 'Download Speed is Low for 13.126.11.21',
        10: 'Download Speed is Low for 13.126.11.21 (Less than 1000 Kbps)',
        11: 'Page Load Time is High for 13.126.11.21 (More than 0.1 Seconds)',
        12: 'HTTP Probe Duration is High for 13.126.11.21 (More than 10 milliseconds)',
        13: 'DNS Query Time is High for 13.126.11.21 (More than 40 milliseconds)',
        14: '13.233.233.130 is not available',
        15: 'DNS Query Time is High for 13.233.233.130 (More than 40 milliseconds)',
        16: 'Reach Time is High for 13.233.233.130 (More than 0.5 Seconds)',
        17: 'Download Speed is Low for 13.233.233.130 (Less than 1000 Kbps)',
        18: 'Page Load Time is High for 13.233.233.130 (More than 0.1 Seconds)',
        19: 'HTTP Probe Duration is High for 13.233.233.130 (More than 10 milliseconds)',
        20: 'DNS Query Time is High for 13.233.233.130 (More than 100 milliseconds)',
        21: 'Download Speed is Low for 13.233.233.130 (Less than 500 Kbps)',
        22: 'HTTP Probe Duration is High for 13.233.233.130 (More than 100 milliseconds)',
        23: 'Page Load Time is High for 13.233.233.130 (More than 0.5 Seconds)'
    }
    
    return server_events, website_events

# Rule-based event detection
def detect_events(metrics):
    possible_events = []
    
    # Server Events Rules
    if metrics['CPU_utilization'] > 90:
        possible_events.append(('server', 2))
    
    if any(metrics[f'Load_average_{m}m_avg'] > 1.5 for m in [1, 5, 15]):
        possible_events.append(('server', 3))
    
    # Website Events Rules
    if metrics['Download_Speed'] < 1000:
        possible_events.append(('website', 10))  # For 13.126.11.21
        possible_events.append(('website', 17))  # For 13.233.233.130
    
    if metrics['Download_Speed'] < 500:
        possible_events.append(('website', 21))  # For 13.233.233.130
    
    if metrics['Reach_Time'] > 0.5:
        possible_events.append(('website', 8))   # For 13.126.11.21
        possible_events.append(('website', 16))  # For 13.233.233.130
    
    if metrics['Page_Load_Time'] > 0.1:
        possible_events.append(('website', 11))  # For 13.126.11.21
        possible_events.append(('website', 18))  # For 13.233.233.130
    
    if metrics['Page_Load_Time'] > 0.5:
        possible_events.append(('website', 23))  # For 13.233.233.130
    
    if metrics['HTTP_Probe_Duration'] > 0.01:  # 10 milliseconds
        possible_events.append(('website', 12))  # For 13.126.11.21
        possible_events.append(('website', 19))  # For 13.233.233.130
    
    if metrics['HTTP_Probe_Duration'] > 0.1:   # 100 milliseconds
        possible_events.append(('website', 22))  # For 13.233.233.130
    
    if metrics['DNS_Query_Time'] > 40:
        possible_events.append(('website', 13))  # For 13.126.11.21
        possible_events.append(('website', 15))  # For 13.233.233.130
    
    if metrics['DNS_Query_Time'] > 100:
        possible_events.append(('website', 20))  # For 13.233.233.130
    
    return possible_events

# Function to predict events for given metrics
def predict_events(metrics):
    server_events, website_events = get_event_mappings()
    possible_events = detect_events(metrics)
    
    print("\nPredicted Events Based on Rules:")
    if not possible_events:
        print("No events detected - all metrics are within normal ranges")
    else:
        for event_type, event_id in possible_events:
            if event_type == 'server':
                print(f"Server Event: {server_events[event_id]}")
            else:
                print(f"Website Event: {website_events[event_id]}")
    
    return possible_events

# Test with your provided metrics
test_metrics = {
    'Download_Speed': 1327.96562,
    'Reach_Time': 0.0080042,
    'Time_to_First_Byte': 0.0007837,
    'HTTP_Probe_Duration': 5489.077,
    'Page_Load_Time': 4.23,
    'DNS_Query_Time': 24,
    'Status_ID': 1,
    'Failed_step_of_scenario_WEB_HEALTH_CHECK': 0,
    'Interrupts_per_second': 313.5782956,
    'Load_average_15m_avg': 23.539062,
    'Load_average_1m_avg': 13.539062,
    'Load_average_5m_avg': 13.5390620,
    'CPU_utilization': 94.818577,
    'CPU_idle_time': 63.181423,
    'CPU_iowait_time': 10.016706,
    'CPU_system_time': 17.233918,
    'CPU_user_time': 73.517962,
    'xvda_Disk_utilization': 160.04166729,
    'Boot_Space_Used_in_percent': 46.13612725,
    'Available_memory_in_percent': 41.499557,
    'Memory_utilization': 58.500443,
    'Space_Available': 9337511936,
    'Boot_Space_Available': 719982592,
    'Available_memory': 416624640,
    'Total_memory': 1003925504
}

# Make predictions
print("Analyzing metrics...")
predictions = predict_events(test_metrics)

# Print detailed analysis
print("\nDetailed Metric Analysis:")
print(f"CPU Utilization: {test_metrics['CPU_utilization']}% (Threshold: 90%)")
print(f"Download Speed: {test_metrics['Download_Speed']} Kbps (Thresholds: 1000, 500 Kbps)")
print(f"Reach Time: {test_metrics['Reach_Time']} seconds (Threshold: 0.5s)")
print(f"Page Load Time: {test_metrics['Page_Load_Time']} seconds (Thresholds: 0.1s, 0.5s)")
print(f"DNS Query Time: {test_metrics['DNS_Query_Time']} ms (Thresholds: 40ms, 100ms)")

Analyzing metrics...

Predicted Events Based on Rules:
Server Event: High CPU utilization (over 90% for 5m)
Server Event: Load average is too high (per CPU load over 1.5 for 5m)
Website Event: Page Load Time is High for 13.126.11.21 (More than 0.1 Seconds)
Website Event: Page Load Time is High for 13.233.233.130 (More than 0.1 Seconds)
Website Event: Page Load Time is High for 13.233.233.130 (More than 0.5 Seconds)
Website Event: HTTP Probe Duration is High for 13.126.11.21 (More than 10 milliseconds)
Website Event: HTTP Probe Duration is High for 13.233.233.130 (More than 10 milliseconds)
Website Event: HTTP Probe Duration is High for 13.233.233.130 (More than 100 milliseconds)

Detailed Metric Analysis:
CPU Utilization: 94.818577% (Threshold: 90%)
Download Speed: 1327.96562 Kbps (Thresholds: 1000, 500 Kbps)
Reach Time: 0.0080042 seconds (Threshold: 0.5s)
Page Load Time: 4.23 seconds (Thresholds: 0.1s, 0.5s)
DNS Query Time: 24 ms (Thresholds: 40ms, 100ms)
