In [1]:
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load data
data = pd.read_csv('/kaggle/input/mlpr-data-split/holdout.csv')

# Define features
storm_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'is_storm_lagged'

# Load storm model and scaler
storm_model = joblib.load('/kaggle/input/storm_rf_model-1/scikitlearn/default/1/storm_rf_model (1).pkl')
scaler = joblib.load('/kaggle/input/storm_rf_model-1/scikitlearn/default/1/storm_scaler.pkl')

# Prepare features and scale
X = data[storm_features].values  # Convert to NumPy array to avoid feature name warning
X_scaled = scaler.transform(X)    # Apply scaling

# Storm prediction
storm_preds = storm_model.predict(X_scaled)

# Add predictions to DataFrame
data['predicted_storm'] = storm_preds

# Evaluate model
y_true = data[target_col]
y_pred = data['predicted_storm']
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=0)
recall = recall_score(y_true, y_pred, zero_division=0)
f1 = f1_score(y_true, y_pred, zero_division=0)
conf_matrix = confusion_matrix(y_true, y_pred)

print("Storm Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true, y_pred, zero_division=0))

# Filter storm cases
storm_data = data[data['predicted_storm'] == 1].copy()



Storm Prediction Metrics:
Accuracy : 0.9066
Precision: 0.9103
Recall   : 0.9073
F1 Score : 0.9088

Confusion Matrix:
[[18566  1927]
 [ 1999 19563]]

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.91      0.90     20493
           1       0.91      0.91      0.91     21562

    accuracy                           0.91     42055
   macro avg       0.91      0.91      0.91     42055
weighted avg       0.91      0.91      0.91     42055



In [3]:
# ----------------- SEVERITY PREDICTION -----------------

# Define severity features
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'severity_class'

# Load severity model and scaler
severity_model = joblib.load('/kaggle/input/severity_rf_model-1/scikitlearn/default/1/severity_rf_model (1).pkl')
severity_scaler = joblib.load('/kaggle/input/severity_rf_model-1/scikitlearn/default/1/severity_scaler.pkl')

# Prepare features and scale
X = storm_data[severity_features].values
X_scaled = severity_scaler.transform(X)

# Severity prediction
severity_preds = severity_model.predict(X_scaled)
storm_data['predicted_severity'] = severity_preds

# Evaluate severity model
y_true = storm_data[target_col]
y_pred = storm_data['predicted_severity']
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_true, y_pred)

print("Severity Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true, y_pred, zero_division=0))

# Filter severity cases
low_severity_data = storm_data[storm_data['predicted_severity'] == 0].copy()
med_severity_data = storm_data[storm_data['predicted_severity'] == 1].copy()
high_severity_data = storm_data[storm_data['predicted_severity'] == 2].copy()



Severity Prediction Metrics:
Accuracy : 0.8306
Precision: 0.8313
Recall   : 0.8306
F1 Score : 0.8284

Confusion Matrix:
[[5963  288  306   31]
 [ 354 5162  737   63]
 [ 308  660 5586   92]
 [ 300  179  322 1139]]

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.91      0.88      6588
           1       0.82      0.82      0.82      6316
           2       0.80      0.84      0.82      6646
          10       0.86      0.59      0.70      1940

    accuracy                           0.83     21490
   macro avg       0.84      0.79      0.81     21490
weighted avg       0.83      0.83      0.83     21490



In [4]:
import numpy as np

# Derive is_outage target (assuming customers_out > 0 indicates an outage)
for df in [low_severity_data, med_severity_data, high_severity_data]:
    df['is_outage'] = (df['customers_out'] > 0).astype(int)

# Extract non-storm rows from storm_df_with_predictions
non_storm_data = data[data['is_storm_lagged'] == 0].copy()
non_storm_data['is_outage'] = 0  # No outage for non-storm events
non_storm_data['predicted_severity'] = 10  # Placeholder for non-storm rows

# Randomly split non-storm rows across the three DataFrames
non_storm_split = np.array_split(non_storm_data.sample(frac=1, random_state=42), 3)
non_storm_low, non_storm_medium, non_storm_high = non_storm_split

# Augment DataFrames with non-storm rows
low_severity_data = pd.concat([low_severity_data, non_storm_low], ignore_index=True)
med_severity_data = pd.concat([med_severity_data, non_storm_medium], ignore_index=True)
high_severity_data = pd.concat([high_severity_data, non_storm_high], ignore_index=True)

  return bound(*args, **kwds)


In [5]:
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]

target_col = 'is_outage'

# LOW

In [6]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# ----------------- LOAD MODEL AND SCALER -----------------
outage_model = joblib.load('/kaggle/input/low_severity_rf_model-1/scikitlearn/default/1/low_severity_rf_model (1).pkl')
scaler = joblib.load('/kaggle/input/low_severity_rf_model-1/scikitlearn/default/1/low_severity_scaler.pkl')

# ----------------- PREPARE FEATURES -----------------
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]
X = low_severity_data[outage_features]

# ----------------- SCALE FEATURES -----------------
X_scaled = scaler.transform(X)

# ----------------- SEVERITY PREDICTION -----------------
outage_pred = outage_model.predict(X_scaled)

# ----------------- TRUE AND PREDICTED LABELS -----------------
target_col = 'is_outage'  # Assuming this is the target column based on prior context
y_true_outage = low_severity_data[target_col]
y_pred_outage = outage_pred

# ----------------- METRIC CALCULATIONS -----------------
accuracy = accuracy_score(y_true_outage, y_pred_outage)
precision = precision_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
recall = recall_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
f1 = f1_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
conf_matrix = confusion_matrix(y_true_outage, y_pred_outage)

# ----------------- OUTPUT -----------------
print("Outage Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true_outage, y_pred_outage, zero_division=0))

Outage Prediction Metrics:
Accuracy : 0.9645
Precision: 0.9545
Recall   : 0.9758
F1 Score : 0.9650

Confusion Matrix:
[[6540  321]
 [ 167 6728]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.96      6861
           1       0.95      0.98      0.97      6895

    accuracy                           0.96     13756
   macro avg       0.96      0.96      0.96     13756
weighted avg       0.96      0.96      0.96     13756



# MEDIUM

In [7]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# ----------------- LOAD MODEL AND SCALER -----------------
outage_model = joblib.load('/kaggle/input/medium_severity_rf_model-1/scikitlearn/default/1/medium_severity_rf_model (1).pkl')
scaler = joblib.load('/kaggle/input/medium_severity_rf_model-1/scikitlearn/default/1/medium_severity_scaler.pkl')

# ----------------- PREPARE FEATURES -----------------
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]
X = med_severity_data[outage_features]

# ----------------- SCALE FEATURES -----------------
X_scaled = scaler.transform(X)

# ----------------- SEVERITY PREDICTION -----------------
outage_pred = outage_model.predict(X_scaled)

# ----------------- TRUE AND PREDICTED LABELS -----------------
target_col = 'is_outage'  # Assuming this is the target column based on prior context
y_true_outage = med_severity_data[target_col]
y_pred_outage = outage_pred

# ----------------- METRIC CALCULATIONS -----------------
accuracy = accuracy_score(y_true_outage, y_pred_outage)
precision = precision_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
recall = recall_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
f1 = f1_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
conf_matrix = confusion_matrix(y_true_outage, y_pred_outage)

# ----------------- OUTPUT -----------------
print("Outage Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true_outage, y_pred_outage, zero_division=0))

Outage Prediction Metrics:
Accuracy : 0.9486
Precision: 0.9237
Recall   : 0.9729
F1 Score : 0.9477

Confusion Matrix:
[[6334  505]
 [ 170 6111]]

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.93      0.95      6839
           1       0.92      0.97      0.95      6281

    accuracy                           0.95     13120
   macro avg       0.95      0.95      0.95     13120
weighted avg       0.95      0.95      0.95     13120



# HIGH

In [8]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# ----------------- LOAD MODEL AND SCALER -----------------
outage_model = joblib.load('/kaggle/input/high_severity_rf_model-1/scikitlearn/default/1/high_severity_rf_model (1).pkl')
scaler = joblib.load('/kaggle/input/high_severity_rf_model-1/scikitlearn/default/1/high_severity_scaler.pkl')

# ----------------- PREPARE FEATURES -----------------
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]
X = high_severity_data[outage_features]

# ----------------- SCALE FEATURES -----------------
X_scaled = scaler.transform(X)

# ----------------- SEVERITY PREDICTION -----------------
outage_pred = outage_model.predict(X_scaled)

# ----------------- TRUE AND PREDICTED LABELS -----------------
target_col = 'is_outage'  # Assuming this is the target column based on prior context
y_true_outage = high_severity_data[target_col]
y_pred_outage = outage_pred

# ----------------- METRIC CALCULATIONS -----------------
accuracy = accuracy_score(y_true_outage, y_pred_outage)
precision = precision_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
recall = recall_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
f1 = f1_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
conf_matrix = confusion_matrix(y_true_outage, y_pred_outage)

# ----------------- OUTPUT -----------------
print("Outage Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true_outage, y_pred_outage, zero_division=0))

Outage Prediction Metrics:
Accuracy : 0.9483
Precision: 0.9265
Recall   : 0.9743
F1 Score : 0.9498

Confusion Matrix:
[[6322  535]
 [ 178 6747]]

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.92      0.95      6857
           1       0.93      0.97      0.95      6925

    accuracy                           0.95     13782
   macro avg       0.95      0.95      0.95     13782
weighted avg       0.95      0.95      0.95     13782

