# 1) Storm Prediction

In [1]:
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load data
data = pd.read_csv('/kaggle/input/mlpr-data-split/holdout.csv')

# Define features
storm_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'is_storm_lagged'

# Load storm model and scaler
storm_model = joblib.load('/kaggle/input/storm_rf_model-1/scikitlearn/default/1/storm_rf_model (1).pkl')
scaler = joblib.load('/kaggle/input/storm_rf_model-1/scikitlearn/default/1/storm_scaler.pkl')

# Prepare features and scale
X = data[storm_features].values  # Convert to NumPy array to avoid feature name warning
X_scaled = scaler.transform(X)    # Apply scaling

# Storm prediction
storm_preds = storm_model.predict(X_scaled)

# Add predictions to DataFrame
data['predicted_storm'] = storm_preds

# Evaluate model
y_true = data[target_col]
y_pred = data['predicted_storm']
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=0)
recall = recall_score(y_true, y_pred, zero_division=0)
f1 = f1_score(y_true, y_pred, zero_division=0)
conf_matrix = confusion_matrix(y_true, y_pred)

print("Storm Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true, y_pred, zero_division=0))

# Filter storm cases
storm_data = data[data['predicted_storm'] == 1].copy()



Storm Prediction Metrics:
Accuracy : 0.9066
Precision: 0.9103
Recall   : 0.9073
F1 Score : 0.9088

Confusion Matrix:
[[18566  1927]
 [ 1999 19563]]

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.91      0.90     20493
           1       0.91      0.91      0.91     21562

    accuracy                           0.91     42055
   macro avg       0.91      0.91      0.91     42055
weighted avg       0.91      0.91      0.91     42055



# 2) Severity Prediction

In [2]:
# Filter out unexpected class labels
valid_classes = [0, 1, 2]
storm_data = storm_data[storm_data['severity_class'].isin(valid_classes)]

In [3]:
# ----------------- SEVERITY PREDICTION -----------------

# Define severity features
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'severity_class'

# Load severity model and scaler
severity_model = joblib.load('/kaggle/input/severity_xg_model/scikitlearn/default/1/severity_xg_model.pkl')
severity_scaler = joblib.load('/kaggle/input/severity_xg_model/scikitlearn/default/1/severity_scaler (1).pkl')

# Prepare features and scale
X = storm_data[severity_features].values
X_scaled = severity_scaler.transform(X)

# Severity prediction
severity_preds = severity_model.predict(X_scaled)
storm_data['predicted_severity'] = severity_preds

# Evaluate severity model
y_true = storm_data[target_col]
y_pred = storm_data['predicted_severity']
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_true, y_pred)

print("Severity Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true, y_pred, zero_division=0))

# Filter severity cases
low_severity_data = storm_data[storm_data['predicted_severity'] == 0].copy()
med_severity_data = storm_data[storm_data['predicted_severity'] == 1].copy()
high_severity_data = storm_data[storm_data['predicted_severity'] == 2].copy()



Severity Prediction Metrics:
Accuracy : 0.8592
Precision: 0.8591
Recall   : 0.8592
F1 Score : 0.8591

Confusion Matrix:
[[5961  304  323]
 [ 346 5208  762]
 [ 314  704 5628]]

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.90      0.90      6588
           1       0.84      0.82      0.83      6316
           2       0.84      0.85      0.84      6646

    accuracy                           0.86     19550
   macro avg       0.86      0.86      0.86     19550
weighted avg       0.86      0.86      0.86     19550



# 3) Outage Prediction

In [4]:
import numpy as np

# Derive is_outage target (assuming customers_out > 0 indicates an outage)
for df in [low_severity_data, med_severity_data, high_severity_data]:
    df['is_outage'] = (df['customers_out'] > 0).astype(int)

# Extract non-storm rows from storm_df_with_predictions
non_storm_data = data[data['is_storm_lagged'] == 0].copy()
non_storm_data['is_outage'] = 0  # No outage for non-storm events
non_storm_data['predicted_severity'] = 10  # Placeholder for non-storm rows

# Randomly split non-storm rows across the three DataFrames
non_storm_split = np.array_split(non_storm_data.sample(frac=1, random_state=42), 3)
non_storm_low, non_storm_medium, non_storm_high = non_storm_split

# Augment DataFrames with non-storm rows
low_severity_data = pd.concat([low_severity_data, non_storm_low], ignore_index=True)
med_severity_data = pd.concat([med_severity_data, non_storm_medium], ignore_index=True)
high_severity_data = pd.concat([high_severity_data, non_storm_high], ignore_index=True)

  return bound(*args, **kwds)


In [5]:
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]

target_col = 'is_outage'

### a. Low

In [6]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# ----------------- LOAD MODEL AND SCALER -----------------
outage_model = joblib.load('/kaggle/input/low_severity_lgm_model/scikitlearn/default/1/low_severity_lgm_model.pkl')
scaler = joblib.load('/kaggle/input/low_severity_lgm_model/scikitlearn/default/1/low_severity_scaler (1).pkl')

# ----------------- PREPARE FEATURES -----------------
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]
X = low_severity_data[outage_features]

# ----------------- SCALE FEATURES -----------------
X_scaled = scaler.transform(X)

# ----------------- SEVERITY PREDICTION -----------------
outage_pred = outage_model.predict(X_scaled)

# ----------------- TRUE AND PREDICTED LABELS -----------------
target_col = 'is_outage'  # Assuming this is the target column based on prior context
y_true_outage = low_severity_data[target_col]
y_pred_outage = outage_pred

# ----------------- METRIC CALCULATIONS -----------------
accuracy = accuracy_score(y_true_outage, y_pred_outage)
precision = precision_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
recall = recall_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
f1 = f1_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
conf_matrix = confusion_matrix(y_true_outage, y_pred_outage)

# ----------------- OUTPUT -----------------
print("Outage Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true_outage, y_pred_outage, zero_division=0))

Outage Prediction Metrics:
Accuracy : 0.9645
Precision: 0.9433
Recall   : 0.9870
F1 Score : 0.9646

Confusion Matrix:
[[6470  391]
 [  86 6505]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.94      0.96      6861
           1       0.94      0.99      0.96      6591

    accuracy                           0.96     13452
   macro avg       0.97      0.96      0.96     13452
weighted avg       0.97      0.96      0.96     13452



### b. Medium

In [7]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# ----------------- LOAD MODEL AND SCALER -----------------
outage_model = joblib.load('/kaggle/input/medium_severity_rf_model-1/scikitlearn/default/1/medium_severity_rf_model (1).pkl')
scaler = joblib.load('/kaggle/input/medium_severity_rf_model-1/scikitlearn/default/1/medium_severity_scaler.pkl')

# ----------------- PREPARE FEATURES -----------------
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]
X = med_severity_data[outage_features]

# ----------------- SCALE FEATURES -----------------
X_scaled = scaler.transform(X)

# ----------------- SEVERITY PREDICTION -----------------
outage_pred = outage_model.predict(X_scaled)

# ----------------- TRUE AND PREDICTED LABELS -----------------
target_col = 'is_outage'  # Assuming this is the target column based on prior context
y_true_outage = med_severity_data[target_col]
y_pred_outage = outage_pred

# ----------------- METRIC CALCULATIONS -----------------
accuracy = accuracy_score(y_true_outage, y_pred_outage)
precision = precision_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
recall = recall_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
f1 = f1_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
conf_matrix = confusion_matrix(y_true_outage, y_pred_outage)

# ----------------- OUTPUT -----------------
print("Outage Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true_outage, y_pred_outage, zero_division=0))

Outage Prediction Metrics:
Accuracy : 0.9471
Precision: 0.9225
Recall   : 0.9704
F1 Score : 0.9458

Confusion Matrix:
[[6334  506]
 [ 184 6023]]

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.93      0.95      6840
           1       0.92      0.97      0.95      6207

    accuracy                           0.95     13047
   macro avg       0.95      0.95      0.95     13047
weighted avg       0.95      0.95      0.95     13047



### c. High

In [8]:
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import numpy as np  # Add numpy for thresholding

# ----------------- LOAD MODEL AND SCALER -----------------
outage_model = joblib.load('/kaggle/input/high_severity_fnn_model/tensorflow2/default/1/high_severity_fnn_model.pkl')
scaler = joblib.load('/kaggle/input/high_severity_fnn_model/tensorflow2/default/1/high_severity_scaler (1).pkl')

# ----------------- PREPARE FEATURES -----------------
outage_features = [
    'tmin', 'tmax', 'tavg', 'ppt',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'desc_char_count'
]
X = high_severity_data[outage_features]

# ----------------- SCALE FEATURES -----------------
X_scaled = scaler.transform(X)

# ----------------- SEVERITY PREDICTION -----------------
outage_pred = outage_model.predict(X_scaled)

# ----------------- THRESHOLD PREDICTIONS -----------------
# Convert continuous predictions to binary (0 or 1) using 0.5 threshold
y_pred_outage = (outage_pred >= 0.5).astype(int)

# ----------------- TRUE AND PREDICTED LABELS -----------------
target_col = 'is_outage'
y_true_outage = high_severity_data[target_col]

# ----------------- METRIC CALCULATIONS -----------------
accuracy = accuracy_score(y_true_outage, y_pred_outage)
precision = precision_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
recall = recall_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
f1 = f1_score(y_true_outage, y_pred_outage, average='binary', zero_division=0)
conf_matrix = confusion_matrix(y_true_outage, y_pred_outage)

# ----------------- OUTPUT -----------------
print("Outage Prediction Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_true_outage, y_pred_outage, zero_division=0))

2025-05-02 10:58:45.241795: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746183525.478206      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746183525.543069      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-02 10:58:58.754476: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Outage Prediction Metrics:
Accuracy : 0.8754
Precision: 0.8248
Recall   : 0.9492
F1 Score : 0.8826

Confusion Matrix:
[[5508 1348]
 [ 340 6348]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.80      0.87      6856
           1       0.82      0.95      0.88      6688

    accuracy                           0.88     13544
   macro avg       0.88      0.88      0.87     13544
weighted avg       0.88      0.88      0.87     13544

