In [1]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Define features and target
storm_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'is_storm_lagged'

# Prepare training features and target
X_train = train_data[storm_features]
y_train = train_data[target_col]

# Scale training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Initialize and train model
rf_model = RandomForestClassifier(
    n_estimators=201,
    max_depth=38,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42
)
rf_model.fit(X_train_scaled, y_train)

# Prepare test features and target
X_test = test_data[storm_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Evaluate model on test data
y_pred = rf_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Storm Prediction Test Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Save the model and scaler
joblib.dump(rf_model, 'storm_rf_model.pkl')
joblib.dump(scaler, 'storm_scaler.pkl')

Storm Prediction Test Metrics:
Accuracy : 0.9067
Precision: 0.9067
Recall   : 0.9078
F1 Score : 0.9073

Confusion Matrix:
[[9469  987]
 [ 975 9597]]

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.91      0.91     10456
           1       0.91      0.91      0.91     10572

    accuracy                           0.91     21028
   macro avg       0.91      0.91      0.91     21028
weighted avg       0.91      0.91      0.91     21028



['storm_scaler.pkl']