In [76]:
import pandas as pd
import numpy as np
from typing import Dict, List, Union, Tuple
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

In [77]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_recall_fscore_support

In [78]:
import pandas as pd
import numpy as np
import os
import joblib
from datetime import datetime
from typing import Dict, List, Union, Tuple

In [79]:

class AlertPredictor:
    def __init__(self, model_type='xgboost'):
        """
        Initializes the AlertPredictor with the specified model type ('xgboost' or 'randomforest').
        """
        self.model_type = model_type
        self.models = {}
        self.scalers = {}
        self.alert_types = ['LOW', 'MEDIUM', 'HIGH', ] # 'SIGMA']
        self.features = ['ChlPrs',
                        #  'hour',
                        #  'day_of_week',
                        #  'month',
                        #  'is_weekend',
                         'rolling_mean', 'rolling_std'] + [f'time_since_{at}' for at in self.alert_types]

    def load_and_preprocess_data(self, folder):
        """
        Loads and preprocesses data from CSV files in the specified folder.
        """
        dfs = []
        for i in range(9, 16):
            file_name = f"HTOL-{i:02d}_alerts.csv"
            df = pd.read_csv(os.path.join(folder, file_name))
            df['machine_id'] = f'HTOL-{i:02d}'
            dfs.append(df)

        combined_df = pd.concat(dfs, ignore_index=True)
        combined_df['Time'] = pd.to_datetime(combined_df['Time'])
        combined_df = combined_df.sort_values(['machine_id', 'Time'])

        return combined_df

    def engineer_features(self, df):
        """
        Engineers features from the preprocessed data.
        """
        df['hour'] = df['Time'].dt.hour
        df['day_of_week'] = df['Time'].dt.dayofweek
        df['month'] = df['Time'].dt.month
        df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

        # Calculate rolling statistics
        df['rolling_mean'] = df.groupby('machine_id')['ChlPrs'].rolling(window=24, min_periods=1).mean().reset_index(0, drop=True)
        df['rolling_std'] = df.groupby('machine_id')['ChlPrs'].rolling(window=24, min_periods=1).std().reset_index(0, drop=True)

        # Calculate time since last alert for each type
        for alert_type in self.alert_types:
            df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
                lambda x: x['Time'] - x[x['ALERT'] == alert_type]['Time'].shift(1)).reset_index(level=0, drop=True)
            df[f'time_since_{alert_type}'] = df[f'time_since_{alert_type}'].dt.total_seconds() / 3600  # Convert to hours

        return df

    def prepare_data_for_classification(self, df, target_alert_type, prediction_window):
        """
        Prepares the data for training the classification model.
        """
        df['target'] = df.groupby('machine_id').apply(
            lambda x: (x['ALERT'] == target_alert_type).rolling(window=prediction_window).max().shift(-prediction_window + 1)).reset_index(level=0,
                                                                                                                                           drop=True)

        X = df[self.features]
        y = df['target'].fillna(0)  # Fill NaN with 0 (no alert)

        return X, y

    def train_and_evaluate_classifier(self, X, y, test_size=0.2):
        """
        Trains and evaluates the classification model.
        """
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        if self.model_type == 'xgboost':
            # XGBoost configuration for imbalanced classification
            model = XGBClassifier(
                n_estimators=100,
                learning_rate=0.1,
                max_depth=6,
                min_child_weight=1,
                gamma=0,
                subsample=0.8,
                colsample_bytree=0.8,
                scale_pos_weight=len(y_train[y_train == 0]) / len(y_train[y_train == 1]),  # Handle class imbalance
                random_state=42,
                eval_metric='logloss',
                early_stopping_rounds=10,
            )
            model.fit(X_train_scaled, y_train, eval_set=[(X_test_scaled, y_test)], verbose=0)
        elif self.model_type == 'randomforest':
            model = RandomForestClassifier(n_estimators=100, random_state=42)
            model.fit(X_train_scaled, y_train)
        else:
            raise ValueError("Invalid model_type. Choose 'xgboost' or 'randomforest'.")

        y_pred = model.predict(X_test_scaled)
        print(classification_report(y_test, y_pred))

        return model, scaler

    def train(self, folder, prediction_window=7):
        """
        Trains the models for each alert type.
        """
        df = self.load_and_preprocess_data(folder)
        df = self.engineer_features(df)

        for alert_type in self.alert_types:
            print(f"\nTraining model for {alert_type} alerts:")
            X, y = self.prepare_data_for_classification(df, alert_type, prediction_window)
            model, scaler = self.train_and_evaluate_classifier(X, y)
            self.models[alert_type] = model
            self.scalers[alert_type] = scaler

    def predict(self, new_data):
        """
        Makes predictions on new data.
        """
        predictions = {}
        for alert_type in self.alert_types:
            X_new = new_data[self.features]
            X_new_scaled = self.scalers[alert_type].transform(X_new)
            alert_probability = self.models[alert_type].predict_proba(X_new_scaled)[0, 1]
            predictions[alert_type] = alert_probability
        return predictions

    def visualize_alerts(self, df, target_alert_type, prediction_window, probability_threshold=0.7):
        """
        Visualizes actual alerts and high-risk periods.
        """
        X = df[self.features]
        X_scaled = self.scalers[target_alert_type].transform(X)

        df['alert_probability'] = self.models[target_alert_type].predict_proba(X_scaled)[:, 1]
        df['high_risk'] = df['alert_probability'] > probability_threshold

        plt.figure(figsize=(20, 15))
        machines = df['machine_id'].unique()
        n_machines = len(machines)

        for i, machine_id in enumerate(machines):
            machine_df = df[df['machine_id'] == machine_id]

            # Plot actual alerts
            alerts = machine_df[machine_df['ALERT'] == target_alert_type]
            plt.scatter(alerts['Time'], [i - 0.2] * len(alerts), marker='o', s=100,
                        label=f'Actual {target_alert_type} Alert' if i == 0 else "")

            # Plot high-risk periods
            high_risk_periods = machine_df[machine_df['high_risk']]
            plt.scatter(high_risk_periods['Time'], [i + 0.2] * len(high_risk_periods), marker='x', s=100,
                        label=f'High Risk Period ({target_alert_type})' if i == 0 else "")

            plt.text(df['Time'].min(), i, machine_id, va='center', ha='right', fontweight='bold')

        plt.yticks(range(n_machines), machines)
        plt.xlabel('Date')
        plt.ylabel('Machine ID')
        plt.title(f'Actual Alerts vs High Risk Periods for {target_alert_type} Alerts')
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

In [80]:
class ProductionAlertPredictor:
    def __init__(self):
        """
        Initialize the production predictor that handles both XGBoost and Random Forest models.
        """
        self.models = {}
        self.scalers = {}
        self.alert_types = ['LOW', 'MEDIUM', 'HIGH']
        self.features = [
            'ChlPrs',
            # 'hour',
            # 'day_of_week',
            # 'month',
            # 'is_weekend',
            'rolling_mean',
            'rolling_std'
        ] + [f'time_since_{at}' for at in self.alert_types]

    def save_models(self, xgb_predictor: AlertPredictor, rf_predictor: AlertPredictor,
                   save_dir: str) -> None:
        """
        Save trained models and scalers to disk.

        Args:
            xgb_predictor: Trained XGBoost AlertPredictor instance
            rf_predictor: Trained Random Forest AlertPredictor instance
            save_dir: Directory to save the models
        """
        os.makedirs(save_dir, exist_ok=True)

        # Save metadata
        metadata = {
            'features': self.features,
            'alert_types': self.alert_types,
            'timestamp': datetime.now().isoformat(),
        }
        joblib.dump(metadata, os.path.join(save_dir, 'metadata.joblib'))

        # Save models and scalers
        for alert_type in self.alert_types:
            # Save XGBoost models and scalers
            joblib.dump(
                xgb_predictor.models[alert_type],
                os.path.join(save_dir, f'xgboost_{alert_type.lower()}_model.joblib')
            )
            joblib.dump(
                xgb_predictor.scalers[alert_type],
                os.path.join(save_dir, f'xgboost_{alert_type.lower()}_scaler.joblib')
            )

            # Save Random Forest models and scalers
            joblib.dump(
                rf_predictor.models[alert_type],
                os.path.join(save_dir, f'randomforest_{alert_type.lower()}_model.joblib')
            )
            joblib.dump(
                rf_predictor.scalers[alert_type],
                os.path.join(save_dir, f'randomforest_{alert_type.lower()}_scaler.joblib')
            )

    def load_models(self, load_dir: str) -> None:
        """
        Load saved models and scalers from disk.

        Args:
            load_dir: Directory containing the saved models
        """
        # Load metadata
        metadata = joblib.load(os.path.join(load_dir, 'metadata.joblib'))
        self.features = metadata['features']
        self.alert_types = metadata['alert_types']

        # Initialize nested dictionaries for models and scalers
        self.models = {'xgboost': {}, 'randomforest': {}}
        self.scalers = {'xgboost': {}, 'randomforest': {}}

        # Load models and scalers
        for alert_type in self.alert_types:
            # Load XGBoost
            self.models['xgboost'][alert_type] = joblib.load(
                os.path.join(load_dir, f'xgboost_{alert_type.lower()}_model.joblib')
            )
            self.scalers['xgboost'][alert_type] = joblib.load(
                os.path.join(load_dir, f'xgboost_{alert_type.lower()}_scaler.joblib')
            )

            # Load Random Forest
            self.models['randomforest'][alert_type] = joblib.load(
                os.path.join(load_dir, f'randomforest_{alert_type.lower()}_model.joblib')
            )
            self.scalers['randomforest'][alert_type] = joblib.load(
                os.path.join(load_dir, f'randomforest_{alert_type.lower()}_scaler.joblib')
            )

    def prepare_features(self, data: pd.DataFrame) -> pd.DataFrame:
        """
        Prepare features for prediction.

        Args:
            data: DataFrame containing at minimum 'Time', 'ChlPrs', and 'machine_id' columns

        Returns:
            DataFrame with engineered features
        """
        df = data.copy()

        # Time-based features
        df['Time'] = pd.to_datetime(df['Time'])
        df['hour'] = df['Time'].dt.hour
        df['day_of_week'] = df['Time'].dt.dayofweek
        df['month'] = df['Time'].dt.month
        df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

        # Rolling statistics
        df['rolling_mean'] = df.groupby('machine_id')['ChlPrs'].rolling(
            window=24, min_periods=1).mean().reset_index(0, drop=True)
        df['rolling_std'] = df.groupby('machine_id')['ChlPrs'].rolling(
            window=24, min_periods=1).std().reset_index(0, drop=True)

        # Time since last alert features
        for alert_type in self.alert_types:
            if 'ALERT' in df.columns:
                df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
                    lambda x: x['Time'] - x[x['ALERT'] == alert_type]['Time'].shift(1)
                ).reset_index(level=0, drop=True)
                df[f'time_since_{alert_type}'] = df[f'time_since_{alert_type}'].dt.total_seconds() / 3600
            else:
                # For new data without alert history, use a large value
                df[f'time_since_{alert_type}'] = 168  # One week in hours

        return df[self.features]

    def predict(self, data: pd.DataFrame, model_type: str = 'xgboost') -> Dict[str, Dict[str, float]]:
        """
        Make predictions using the loaded models.

        Args:
            data: DataFrame containing required features
            model_type: 'xgboost' or 'randomforest'

        Returns:
            Dictionary containing predictions for each machine and alert type
        """
        if model_type not in ['xgboost', 'randomforest']:
            raise ValueError("model_type must be 'xgboost' or 'randomforest'")

        # Prepare features
        X = self.prepare_features(data)

        # Make predictions for each machine and alert type
        predictions = {}
        for machine_id in data['machine_id'].unique():
            machine_data = X[data['machine_id'] == machine_id]
            machine_predictions = {}

            for alert_type in self.alert_types:
                # Scale the features
                X_scaled = self.scalers[model_type][alert_type].transform(machine_data)

                # Get prediction probabilities
                probs = self.models[model_type][alert_type].predict_proba(X_scaled)[:, 1]

                # Store the average probability for this alert type
                machine_predictions[alert_type] = float(probs.mean())

            predictions[machine_id] = machine_predictions

        return predictions

def save_trained_models(xgb_predictor: AlertPredictor, rf_predictor: AlertPredictor,
                       save_dir: str) -> None:
    """
    Convenience function to save trained models.
    """
    production_predictor = ProductionAlertPredictor()
    production_predictor.save_models(xgb_predictor, rf_predictor, save_dir)
    print(f"Models saved successfully to {save_dir}")

def load_production_predictor(load_dir: str) -> ProductionAlertPredictor:
    """
    Convenience function to load saved models.
    """
    production_predictor = ProductionAlertPredictor()
    production_predictor.load_models(load_dir)
    return production_predictor

In [81]:
class EnsembleAlertPredictor:
    def __init__(self, base_predictor: ProductionAlertPredictor,
                 lookback_window: int = 168,  # 7 days in hours
                 prediction_window: int = 168,  # 7 days in hours
                 agreement_threshold: float = 0.7):
        """
        Initialize the ensemble predictor that requires agreement between XGBoost and Random Forest.
        """
        self.predictor = base_predictor
        self.lookback_window = lookback_window
        self.prediction_window = prediction_window
        self.agreement_threshold = agreement_threshold

    def prepare_time_series_features(self, data: pd.DataFrame) -> pd.DataFrame:
        """
        Prepare time series features from historical data.
        """
        df = data.copy()

        # Ensure numeric type for ChlPrs
        df['ChlPrs'] = pd.to_numeric(df['ChlPrs'], errors='coerce')

        # Convert Time to datetime if it's not already
        df['Time'] = pd.to_datetime(df['Time'])

        # Sort the data
        df = df.sort_values(['machine_id', 'Time'])

        # Calculate rolling statistics for each machine
        df['rolling_mean'] = df.groupby('machine_id')['ChlPrs'].rolling(
            window=24, min_periods=1).mean().reset_index(0, drop=True)
        df['rolling_std'] = df.groupby('machine_id')['ChlPrs'].rolling(
            window=24, min_periods=1).std().reset_index(0, drop=True)

        # Handle time since last alert features
        if 'ALERT' in df.columns:
            for alert_type in self.predictor.alert_types:
                # Initialize the column with default value
                df[f'time_since_{alert_type}'] = 168.0  # One week in hours

                # Calculate time since last alert for each machine
                for machine in df['machine_id'].unique():
                    machine_data = df[df['machine_id'] == machine].copy()
                    alert_mask = machine_data['ALERT'] == alert_type

                    if alert_mask.any():
                        # Get alert times for this type
                        alert_times = machine_data[alert_mask]['Time']

                        # For each row, find the time difference to the most recent previous alert
                        for idx in machine_data.index:
                            current_time = machine_data.loc[idx, 'Time']
                            previous_alerts = alert_times[alert_times < current_time]

                            if not previous_alerts.empty:
                                last_alert_time = previous_alerts.max()
                                hours_since = (current_time - last_alert_time).total_seconds() / 3600
                                df.loc[idx, f'time_since_{alert_type}'] = hours_since
        else:
            # If no alert history, set default values
            for alert_type in self.predictor.alert_types:
                df[f'time_since_{alert_type}'] = 168.0  # One week in hours

        # Fill missing values
        numeric_columns = df.select_dtypes(include=[np.number]).columns
        df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')

        return df

    def predict_window(self, historical_data: pd.DataFrame) -> Dict[str, Dict[str, Dict[str, float]]]:
        """
        Make predictions using both models and require agreement.
        """
        # Prepare features
        features_df = self.prepare_time_series_features(historical_data)

        # Get predictions from both models
        xgb_predictions = self.predictor.predict(features_df, model_type='xgboost')
        rf_predictions = self.predictor.predict(features_df, model_type='randomforest')

        # Combine predictions where models agree
        ensemble_predictions = {}

        for machine_id in features_df['machine_id'].unique():
            machine_xgb = xgb_predictions[machine_id]
            machine_rf = rf_predictions[machine_id]

            ensemble_predictions[machine_id] = {
                'agreed_alerts': {},
                'probabilities': {
                    'xgboost': machine_xgb,
                    'randomforest': machine_rf
                }
            }

            # Check for agreement between models
            for alert_type in self.predictor.alert_types:
                xgb_prob = machine_xgb[alert_type]
                rf_prob = machine_rf[alert_type]

                # Models agree if both predict above threshold
                if (xgb_prob > self.agreement_threshold and
                    rf_prob > self.agreement_threshold):
                    ensemble_predictions[machine_id]['agreed_alerts'][alert_type] = {
                        'probability': (xgb_prob + rf_prob) / 2,
                        'confidence': min(xgb_prob, rf_prob) / max(xgb_prob, rf_prob)
                    }

        return ensemble_predictions

    def visualize_predictions(self, historical_data: pd.DataFrame,
                            predictions: Dict[str, Dict[str, Dict[str, float]]],
                            machine_id: str = None):
        """
        Visualize the predictions alongside historical data.
        """
        if machine_id is None:
            machine_id = list(predictions.keys())[0]

        machine_data = historical_data[historical_data['machine_id'] == machine_id].copy()
        machine_data['Time'] = pd.to_datetime(machine_data['Time'])

        # Create the visualization
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), height_ratios=[2, 1])

        # Plot historical pressure data
        ax1.plot(machine_data['Time'], machine_data['ChlPrs'],
                label='Pressure', color='blue', alpha=0.6)
        ax1.set_title(f'Historical Data and Predictions for {machine_id}')
        ax1.set_xlabel('Time')
        ax1.set_ylabel('Pressure')
        ax1.grid(True)

        # Add rolling mean and std to the plot
        features = self.prepare_time_series_features(machine_data)
        ax1.plot(features['Time'], features['rolling_mean'],
                label='Rolling Mean (24h)', color='green', alpha=0.5)
        ax1.fill_between(features['Time'],
                        features['rolling_mean'] - features['rolling_std'],
                        features['rolling_mean'] + features['rolling_std'],
                        color='green', alpha=0.2, label='±1 Std Dev')
        ax1.legend()

        # Plot model probabilities
        machine_preds = predictions[machine_id]
        last_time = machine_data['Time'].max()
        prediction_times = [last_time + timedelta(hours=i)
                          for i in range(self.prediction_window)]

        for alert_type in self.predictor.alert_types:
            xgb_prob = machine_preds['probabilities']['xgboost'][alert_type]
            rf_prob = machine_preds['probabilities']['randomforest'][alert_type]

            ax2.plot([prediction_times[0], prediction_times[-1]],
                    [xgb_prob, xgb_prob],
                    '--', label=f'XGB {alert_type}')
            ax2.plot([prediction_times[0], prediction_times[-1]],
                    [rf_prob, rf_prob],
                    ':', label=f'RF {alert_type}')

            # Highlight agreed alerts
            if alert_type in machine_preds['agreed_alerts']:
                alert_info = machine_preds['agreed_alerts'][alert_type]
                ax2.axhspan(self.agreement_threshold,
                           alert_info['probability'],
                           alpha=0.3,
                           color='red',
                           label=f'Agreed {alert_type} Alert')

        ax2.axhline(y=self.agreement_threshold, color='r', linestyle='-',
                    alpha=0.3, label='Agreement Threshold')
        ax2.set_ylim(0, 1)
        ax2.set_xlabel('Time')
        ax2.set_ylabel('Alert Probability')
        ax2.grid(True)
        ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

        plt.tight_layout()
        plt.show()

# Keep the simulate_production_monitoring function the same as before
def simulate_production_monitoring(predictor: EnsembleAlertPredictor,
                                data: pd.DataFrame,
                                start_date: str,
                                end_date: str,
                                step_hours: int = 24):
    """
    Simulate production monitoring by stepping through time windows.
    """
    data = data.copy()
    data['Time'] = pd.to_datetime(data['Time'])

    current_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    results = []

    while current_date <= end_date:
        # Get historical window
        historical_window = data[
            (data['Time'] >= current_date - timedelta(hours=predictor.lookback_window)) &
            (data['Time'] <= current_date)
        ].copy()

        if not historical_window.empty:
            # Make prediction
            predictions = predictor.predict_window(historical_window)
            print(predictions)

            # Store results
            for machine_id, machine_preds in predictions.items():
                if machine_preds['agreed_alerts']:
                    results.append({
                        'time': current_date,
                        'machine_id': machine_id,
                        'predictions': machine_preds
                    })

            # Visualize some predictions
            if len(results) > 0 and len(results) % 5 == 0:  # Visualize every 5th prediction
                predictor.visualize_predictions(historical_window, predictions)

        current_date += timedelta(hours=step_hours)

    return results

In [82]:
folder = "../../../outlier_tolerance=5_grouping_time_window=200_anomaly_threshold=6_start_date=2022-01-01_end_date=2026-01-01"
output_dir = "production_models_solo"

In [83]:
# Load the base predictor
predictor = load_production_predictor(output_dir)

# Create the ensemble predictor
ensemble_predictor = EnsembleAlertPredictor(
    base_predictor=predictor,
    lookback_window=168,  # 7 days of historical data
    prediction_window=168,  # 7 days prediction window
    agreement_threshold=0.3
)

In [84]:
# Load some historical data for testing
data_files = [f for f in os.listdir(folder) if f.endswith('_alerts.csv')]
dfs = []
for file in data_files:
    df = pd.read_csv(os.path.join(folder, file))
    machine_id = file.split('_')[0]
    df['machine_id'] = machine_id
    dfs.append(df)

historical_data = pd.concat(dfs, ignore_index=True)

# Simulate production monitoring
results = simulate_production_monitoring(
    predictor=ensemble_predictor,
    data=historical_data,
    start_date='2024-03-01',
    end_date='2024-04-01',
    step_hours=24
)

# Print results
for result in results:
    print(f"\nPredictions for {result['machine_id']} at {result['time']}:")
    for alert_type, alert_info in result['predictions']['agreed_alerts'].items():
        print(f"{alert_type} Alert: Probability = {alert_info['probability']:.3f}, "
              f"Confidence = {alert_info['confidence']:.3f}")

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.042073145508766174, 'MEDIUM': 7.084263779688627e-05, 'HIGH': 0.00043465394992381334}, 'randomforest': {'LOW': 1.2135922330097087e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.5195086002349854, 'MEDIUM': 3.527911030687392e-05, 'HIGH': 0.00043536117300391197}, 'randomforest': {'LOW': 0.009815369261477047, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.017293546348810196, 'MEDIUM': 4.509976861299947e-05, 'HIGH': 0.0005834285402670503}, 'randomforest': {'LOW': 6.715916722632639e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.025857599452137947, 'MEDIUM': 5.3079143981449306e-05, 'HIGH': 0.0007224732544273138}, 'randomforest': {'LOW': 1.6233766233766234e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboo

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.042073145508766174, 'MEDIUM': 7.084263779688627e-05, 'HIGH': 0.00043465394992381334}, 'randomforest': {'LOW': 1.2135922330097087e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.5195086002349854, 'MEDIUM': 3.527911030687392e-05, 'HIGH': 0.00043536117300391197}, 'randomforest': {'LOW': 0.009815369261477047, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.017293546348810196, 'MEDIUM': 4.509976861299947e-05, 'HIGH': 0.0005834285402670503}, 'randomforest': {'LOW': 6.715916722632639e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.025857599452137947, 'MEDIUM': 5.3079143981449306e-05, 'HIGH': 0.0007224732544273138}, 'randomforest': {'LOW': 1.6233766233766234e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboo

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.042073145508766174, 'MEDIUM': 7.084263779688627e-05, 'HIGH': 0.00043465394992381334}, 'randomforest': {'LOW': 1.2135922330097087e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.5195086002349854, 'MEDIUM': 3.527911030687392e-05, 'HIGH': 0.00043536117300391197}, 'randomforest': {'LOW': 0.009815369261477047, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.017293546348810196, 'MEDIUM': 4.509976861299947e-05, 'HIGH': 0.0005834285402670503}, 'randomforest': {'LOW': 6.715916722632639e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.025857599452137947, 'MEDIUM': 5.3079143981449306e-05, 'HIGH': 0.0007224732544273138}, 'randomforest': {'LOW': 1.6233766233766234e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboo

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.042073145508766174, 'MEDIUM': 7.084263779688627e-05, 'HIGH': 0.00043465394992381334}, 'randomforest': {'LOW': 1.2135922330097087e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.5195086002349854, 'MEDIUM': 3.527911030687392e-05, 'HIGH': 0.00043536117300391197}, 'randomforest': {'LOW': 0.009815369261477047, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.017293546348810196, 'MEDIUM': 4.509976861299947e-05, 'HIGH': 0.0005834285402670503}, 'randomforest': {'LOW': 6.715916722632639e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.025857599452137947, 'MEDIUM': 5.3079143981449306e-05, 'HIGH': 0.0007224732544273138}, 'randomforest': {'LOW': 1.6233766233766234e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboo

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.042073145508766174, 'MEDIUM': 7.084263779688627e-05, 'HIGH': 0.00043465394992381334}, 'randomforest': {'LOW': 1.2135922330097087e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.5195086002349854, 'MEDIUM': 3.527911030687392e-05, 'HIGH': 0.00043536117300391197}, 'randomforest': {'LOW': 0.009815369261477047, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.017293546348810196, 'MEDIUM': 4.509976861299947e-05, 'HIGH': 0.0005834285402670503}, 'randomforest': {'LOW': 6.715916722632639e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.025857599452137947, 'MEDIUM': 5.3079143981449306e-05, 'HIGH': 0.0007224732544273138}, 'randomforest': {'LOW': 1.6233766233766234e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboo

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.26263338327407837, 'MEDIUM': 0.00653473986312747, 'HIGH': 0.00048753590090200305}, 'randomforest': {'LOW': 0.003751520681265207, 'MEDIUM': 0.0013427615571776155, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.5051383376121521, 'MEDIUM': 3.1123909138841555e-05, 'HIGH': 0.00043579639168456197}, 'randomforest': {'LOW': 0.010468657599053815, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.01987769827246666, 'MEDIUM': 4.7812114644329995e-05, 'HIGH': 0.0005757565959356725}, 'randomforest': {'LOW': 3.920031360250882e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.025981992483139038, 'MEDIUM': 5.0435854063834995e-05, 'HIGH': 0.0006417742697522044}, 'randomforest': {'LOW': 9.72762645914397e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabiliti

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.26263338327407837, 'MEDIUM': 0.00653473986312747, 'HIGH': 0.00048753590090200305}, 'randomforest': {'LOW': 0.003751520681265207, 'MEDIUM': 0.0013427615571776155, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.5051383376121521, 'MEDIUM': 3.1123909138841555e-05, 'HIGH': 0.00043579639168456197}, 'randomforest': {'LOW': 0.010468657599053815, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.01987769827246666, 'MEDIUM': 4.7812114644329995e-05, 'HIGH': 0.0005757565959356725}, 'randomforest': {'LOW': 3.920031360250882e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.025981992483139038, 'MEDIUM': 5.0435854063834995e-05, 'HIGH': 0.0006417742697522044}, 'randomforest': {'LOW': 9.72762645914397e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabiliti

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3949875235557556, 'MEDIUM': 0.010428185574710369, 'HIGH': 0.0005193979013711214}, 'randomforest': {'LOW': 0.005957602339181287, 'MEDIUM': 0.0021515594541910332, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.49840766191482544, 'MEDIUM': 2.934288750111591e-05, 'HIGH': 0.00043597922194749117}, 'randomforest': {'LOW': 0.010525210084033612, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.021526681259274483, 'MEDIUM': 4.954994801664725e-05, 'HIGH': 0.0005706415977329016}, 'randomforest': {'LOW': 2.1390374331550802e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.026043027639389038, 'MEDIUM': 4.910735151497647e-05, 'HIGH': 0.0005980854039080441}, 'randomforest': {'LOW': 6.3398140321217245e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilit

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3949875235557556, 'MEDIUM': 0.010428185574710369, 'HIGH': 0.0005193979013711214}, 'randomforest': {'LOW': 0.005957602339181287, 'MEDIUM': 0.0021515594541910332, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.49840766191482544, 'MEDIUM': 2.934288750111591e-05, 'HIGH': 0.00043597922194749117}, 'randomforest': {'LOW': 0.010525210084033612, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.021526681259274483, 'MEDIUM': 4.954994801664725e-05, 'HIGH': 0.0005706415977329016}, 'randomforest': {'LOW': 2.1390374331550802e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.026043027639389038, 'MEDIUM': 4.910735151497647e-05, 'HIGH': 0.0005980854039080441}, 'randomforest': {'LOW': 6.3398140321217245e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilit

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3949875235557556, 'MEDIUM': 0.010428185574710369, 'HIGH': 0.0005193979013711214}, 'randomforest': {'LOW': 0.005957602339181287, 'MEDIUM': 0.0021515594541910332, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.49840766191482544, 'MEDIUM': 2.934288750111591e-05, 'HIGH': 0.00043597922194749117}, 'randomforest': {'LOW': 0.010525210084033612, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.021526681259274483, 'MEDIUM': 4.954994801664725e-05, 'HIGH': 0.0005706415977329016}, 'randomforest': {'LOW': 2.1390374331550802e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.026043027639389038, 'MEDIUM': 4.910735151497647e-05, 'HIGH': 0.0005980854039080441}, 'randomforest': {'LOW': 6.3398140321217245e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilit

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3949875235557556, 'MEDIUM': 0.010428185574710369, 'HIGH': 0.0005193979013711214}, 'randomforest': {'LOW': 0.005957602339181287, 'MEDIUM': 0.0021515594541910332, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.49840766191482544, 'MEDIUM': 2.934288750111591e-05, 'HIGH': 0.00043597922194749117}, 'randomforest': {'LOW': 0.010525210084033612, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.021526681259274483, 'MEDIUM': 4.954994801664725e-05, 'HIGH': 0.0005706415977329016}, 'randomforest': {'LOW': 2.1390374331550802e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.026043027639389038, 'MEDIUM': 4.910735151497647e-05, 'HIGH': 0.0005980854039080441}, 'randomforest': {'LOW': 6.3398140321217245e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilit

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3949875235557556, 'MEDIUM': 0.010428185574710369, 'HIGH': 0.0005193979013711214}, 'randomforest': {'LOW': 0.005957602339181287, 'MEDIUM': 0.0021515594541910332, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.49840766191482544, 'MEDIUM': 2.934288750111591e-05, 'HIGH': 0.00043597922194749117}, 'randomforest': {'LOW': 0.010525210084033612, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.021526681259274483, 'MEDIUM': 4.954994801664725e-05, 'HIGH': 0.0005706415977329016}, 'randomforest': {'LOW': 2.1390374331550802e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.026043027639389038, 'MEDIUM': 4.910735151497647e-05, 'HIGH': 0.0005980854039080441}, 'randomforest': {'LOW': 6.3398140321217245e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilit

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.05697609484195709, 'MEDIUM': 5.0119928346248344e-05, 'HIGH': 0.0004320989828556776}, 'randomforest': {'LOW': 2.390057361376673e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3643382787704468, 'MEDIUM': 4.2268122342647985e-05, 'HIGH': 0.0004360289894975722}, 'randomforest': {'LOW': 0.0024820659971305594, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0919475257396698, 'MEDIUM': 3.232579183531925e-05, 'HIGH': 0.0005001386743970215}, 'randomforest': {'LOW': 0.00021363173957273654, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0231529138982296, 'MEDIUM': 4.6016357373446226e-05, 'HIGH': 0.0005767319817095995}, 'randomforest': {'LOW': 4.9333991119881596e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboost':

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.05697609484195709, 'MEDIUM': 5.0119928346248344e-05, 'HIGH': 0.0004320989828556776}, 'randomforest': {'LOW': 2.390057361376673e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3643382787704468, 'MEDIUM': 4.2268122342647985e-05, 'HIGH': 0.0004360289894975722}, 'randomforest': {'LOW': 0.0024820659971305594, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0919475257396698, 'MEDIUM': 3.232579183531925e-05, 'HIGH': 0.0005001386743970215}, 'randomforest': {'LOW': 0.00021363173957273654, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0231529138982296, 'MEDIUM': 4.6016357373446226e-05, 'HIGH': 0.0005767319817095995}, 'randomforest': {'LOW': 4.9333991119881596e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboost':

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.05697609484195709, 'MEDIUM': 5.0119928346248344e-05, 'HIGH': 0.0004320989828556776}, 'randomforest': {'LOW': 2.390057361376673e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3643382787704468, 'MEDIUM': 4.2268122342647985e-05, 'HIGH': 0.0004360289894975722}, 'randomforest': {'LOW': 0.0024820659971305594, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0919475257396698, 'MEDIUM': 3.232579183531925e-05, 'HIGH': 0.0005001386743970215}, 'randomforest': {'LOW': 0.00021363173957273654, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0231529138982296, 'MEDIUM': 4.6016357373446226e-05, 'HIGH': 0.0005767319817095995}, 'randomforest': {'LOW': 4.9333991119881596e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboost':

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.05697609484195709, 'MEDIUM': 5.0119928346248344e-05, 'HIGH': 0.0004320989828556776}, 'randomforest': {'LOW': 2.390057361376673e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3643382787704468, 'MEDIUM': 4.2268122342647985e-05, 'HIGH': 0.0004360289894975722}, 'randomforest': {'LOW': 0.0024820659971305594, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0919475257396698, 'MEDIUM': 3.232579183531925e-05, 'HIGH': 0.0005001386743970215}, 'randomforest': {'LOW': 0.00021363173957273654, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0231529138982296, 'MEDIUM': 4.6016357373446226e-05, 'HIGH': 0.0005767319817095995}, 'randomforest': {'LOW': 4.9333991119881596e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboost':

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.05697609484195709, 'MEDIUM': 5.0119928346248344e-05, 'HIGH': 0.0004320989828556776}, 'randomforest': {'LOW': 2.390057361376673e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3643382787704468, 'MEDIUM': 4.2268122342647985e-05, 'HIGH': 0.0004360289894975722}, 'randomforest': {'LOW': 0.0024820659971305594, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0919475257396698, 'MEDIUM': 3.232579183531925e-05, 'HIGH': 0.0005001386743970215}, 'randomforest': {'LOW': 0.00021363173957273654, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0231529138982296, 'MEDIUM': 4.6016357373446226e-05, 'HIGH': 0.0005767319817095995}, 'randomforest': {'LOW': 4.9333991119881596e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboost':

  df[numeric_columns] = df[numeric_columns].fillna(method='ffill').fillna(method='bfill')
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(
  df[f'time_since_{alert_type}'] = df.groupby('machine_id').apply(


{'HTOL-09': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.05697609484195709, 'MEDIUM': 5.0119928346248344e-05, 'HIGH': 0.0004320989828556776}, 'randomforest': {'LOW': 2.390057361376673e-05, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-10': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.3643382787704468, 'MEDIUM': 4.2268122342647985e-05, 'HIGH': 0.0004360289894975722}, 'randomforest': {'LOW': 0.0024820659971305594, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-11': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0919475257396698, 'MEDIUM': 3.232579183531925e-05, 'HIGH': 0.0005001386743970215}, 'randomforest': {'LOW': 0.00021363173957273654, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-12': {'agreed_alerts': {}, 'probabilities': {'xgboost': {'LOW': 0.0231529138982296, 'MEDIUM': 4.6016357373446226e-05, 'HIGH': 0.0005767319817095995}, 'randomforest': {'LOW': 4.9333991119881596e-06, 'MEDIUM': 0.0, 'HIGH': 0.0}}}, 'HTOL-13': {'agreed_alerts': {}, 'probabilities': {'xgboost':

In [85]:
print(results)

[]
