In [5]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Filter for storm events
train_data = train_data[train_data['is_storm_lagged'] == 1]
test_data = test_data[test_data['is_storm_lagged'] == 1]

# Define features and target
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'severity_class'

# Prepare training features and target
X_train = train_data[severity_features]
y_train = train_data[target_col]

# Scale training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Initialize and train model
xgb_model = XGBClassifier(
    n_estimators=403,
    max_depth=10,
    learning_rate=0.09065400280278058,
    subsample=0.933968095670629,
    colsample_bytree=0.5647574078202744,
    gamma=0.00017586655077512627,
    min_child_weight=2,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
xgb_model.fit(X_train_scaled, y_train)

# Prepare test features and target
X_test = test_data[severity_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Evaluate model on test data
y_pred = xgb_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Severity Prediction Test Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Save the model and scaler
joblib.dump(xgb_model, 'severity_xg_model.pkl')
joblib.dump(scaler, 'severity_scaler.pkl')

Severity Prediction Test Metrics:
Accuracy : 0.8502
Precision: 0.8500
Recall   : 0.8502
F1 Score : 0.8500

Confusion Matrix:
[[2897  164  174]
 [ 217 2434  380]
 [ 158  318 2676]]

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.90      0.89      3235
           1       0.83      0.80      0.82      3031
           2       0.83      0.85      0.84      3152

    accuracy                           0.85      9418
   macro avg       0.85      0.85      0.85      9418
weighted avg       0.85      0.85      0.85      9418



['severity_scaler.pkl']