In [8]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Filter for storm events
train_data = train_data[train_data['is_storm_lagged'] == 1]
test_data = test_data[test_data['is_storm_lagged'] == 1]

severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'has_tornado', 'has_hail', 'has_flood', 'has_wind',
    'has_tree', 'has_broken', 'has_blown', 'tmin', 'tmax', 'tavg', 'ppt',
    'MAGNITUDE_IMPUTED', 'CZ_FIPS'
]
target_col = 'severity_class'

# Prepare training features and target
X_train = train_data[severity_features]
y_train = train_data[target_col]

# Scale training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Initialize and train model
rf_model = RandomForestClassifier(
    n_estimators=201,
    max_depth=38,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42
)
rf_model.fit(X_train_scaled, y_train)

# Prepare test features and target
X_test = test_data[severity_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Evaluate model on test data
y_pred = rf_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Severity Prediction Test Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Save the model and scaler
joblib.dump(rf_model, 'severity_rf_model.pkl')
joblib.dump(scaler, 'severity_scaler.pkl')

Severity Prediction Test Metrics:
Accuracy : 0.9998
Precision: 0.9998
Recall   : 0.9998
F1 Score : 0.9998

Confusion Matrix:
[[3235    0    0    0]
 [   1 3030    0    0]
 [   0    0 3152    0]
 [   1    0    0 1153]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3235
           1       1.00      1.00      1.00      3031
           2       1.00      1.00      1.00      3152
          10       1.00      1.00      1.00      1154

    accuracy                           1.00     10572
   macro avg       1.00      1.00      1.00     10572
weighted avg       1.00      1.00      1.00     10572



['severity_scaler.pkl']

--------------------------------------

In [None]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Filter for storm events
train_data = train_data[train_data['is_storm_lagged'] == 1]
test_data = test_data[test_data['is_storm_lagged'] == 1]

# Define valid severity classes
valid_classes = [0, 1, 2, 3]

# Filter out invalid severity_class values
train_data = train_data[train_data['severity_class'].isin(valid_classes)]
test_data = test_data[test_data['severity_class'].isin(valid_classes)]

# Define features and target
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'has_tornado', 'has_hail', 'has_flood', 'has_wind',
    'has_tree', 'has_broken', 'has_blown', 'tmin', 'tmax', 'tavg', 'ppt',
    'MAGNITUDE_IMPUTED', 'CZ_FIPS'
]
target_col = 'severity_class'

# Prepare training features and target
X_train = train_data[severity_features]
y_train = train_data[target_col]

# Scale training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Initialize and train model
xgb_model = XGBClassifier(
    n_estimators=403,
    max_depth=10,
    learning_rate=0.09065400280278058,
    subsample=0.933968095670629,
    colsample_bytree=0.5647574078202744,
    gamma=0.00017586655077512627,
    min_child_weight=2,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)
xgb_model.fit(X_train_scaled, y_train)

# Prepare test features and target
X_test = test_data[severity_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Evaluate model on test data
y_pred = xgb_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Severity Prediction Test Metrics:")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Optionally save the model and scaler
# joblib.dump(xgb_model, 'severity_xg_model.pkl')
# joblib.dump(scaler, 'severity_scaler.pkl')

In [3]:
import pandas as pd
import joblib
import lightgbm as lgb
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Filter for storm events
train_data = train_data[train_data['is_storm_lagged'] == 1]
test_data = test_data[test_data['is_storm_lagged'] == 1]

# Define features and target
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'severity_class'

# Prepare training features and target
X_train = train_data[severity_features]
y_train = train_data[target_col]

# Scale training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Prepare test features and target
X_test = test_data[severity_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Define objective function for Optuna
def objective(trial):
    param = {
        'objective': 'multiclass',
        'num_class': len(y_train.unique()),
        'metric': 'multi_logloss',
        'boosting_type': 'gbdt',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'min_child_weight': trial.suggest_float('min_child_weight', 1e-3, 10, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
        'random_state': 42,
        'verbose': -1
    }

    # Train LightGBM model
    model = lgb.LGBMClassifier(**param)
    model.fit(X_train_scaled, y_train)

    # Predict and calculate accuracy
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # Adjust n_trials as needed

# Get best parameters
best_params = study.best_params
print("Best parameters:", best_params)

# Train final model with best parameters
final_params = {
    'objective': 'multiclass',
    'num_class': len(y_train.unique()),
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',
    'verbose': -1,
    'random_state': 42,
    **best_params
}
lgb_model = lgb.LGBMClassifier(**final_params)
lgb_model.fit(X_train_scaled, y_train)

# Evaluate model on test data
y_pred = lgb_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Severity Prediction Test Metrics (LightGBM):")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Save the model and scaler
# joblib.dump(lgb_model, 'severity_lgb_model.pkl')
# joblib.dump(scaler, 'severity_scaler.pkl')

[I 2025-05-13 18:06:03,188] A new study created in memory with name: no-name-d9795be3-ddca-412b-a599-0021637c9d60
[I 2025-05-13 18:06:23,305] Trial 0 finished with value: 0.6571131290200529 and parameters: {'n_estimators': 844, 'max_depth': 14, 'learning_rate': 0.016419063074686612, 'subsample': 0.8414746347199225, 'colsample_bytree': 0.9287190128546008, 'min_child_weight': 0.0019491484745080127, 'reg_alpha': 0.16012974705799382, 'reg_lambda': 0.003258151609977881}. Best is trial 0 with value: 0.6571131290200529.
[I 2025-05-13 18:06:38,096] Trial 1 finished with value: 0.6128452516080212 and parameters: {'n_estimators': 595, 'max_depth': 9, 'learning_rate': 0.013713452744354855, 'subsample': 0.9408418339243743, 'colsample_bytree': 0.9095841493007747, 'min_child_weight': 0.0011637895424480662, 'reg_alpha': 0.011690821662610389, 'reg_lambda': 0.08411409054769989}. Best is trial 0 with value: 0.6571131290200529.
[I 2025-05-13 18:06:48,500] Trial 2 finished with value: 0.7948354143019296 a

Best parameters: {'n_estimators': 943, 'max_depth': 14, 'learning_rate': 0.299798636793099, 'subsample': 0.6815843772645557, 'colsample_bytree': 0.967548003596196, 'min_child_weight': 1.2892385914567714, 'reg_alpha': 0.009927790289736168, 'reg_lambda': 1.7396121269192825e-06}
Severity Prediction Test Metrics (LightGBM):
Accuracy : 0.8580
Precision: 0.8582
Recall   : 0.8580
F1 Score : 0.8581

Confusion Matrix:
[[2896  156  155   28]
 [ 167 2478  336   50]
 [ 120  320 2665   47]
 [  19   42   61 1032]]

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.90      0.90      3235
           1       0.83      0.82      0.82      3031
           2       0.83      0.85      0.84      3152
          10       0.89      0.89      0.89      1154

    accuracy                           0.86     10572
   macro avg       0.86      0.86      0.86     10572
weighted avg       0.86      0.86      0.86     10572



In [None]:
import pandas as pd
import joblib
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Filter for storm events
train_data = train_data[train_data['is_storm_lagged'] == 1]
test_data = test_data[test_data['is_storm_lagged'] == 1]

# Define features and target
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'severity_class'

# Prepare training features and target
X_train = train_data[severity_features]
y_train = train_data[target_col]

# Scale training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Prepare test features and target
X_test = test_data[severity_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Use best parameters found via Optuna
final_params = {
    'objective': 'multiclass',
    'num_class': len(y_train.unique()),
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',
    'verbose': -1,
    'random_state': 42,
    'n_estimators': 943,
    'max_depth': 14,
    'learning_rate': 0.299798636793099,
    'subsample': 0.6815843772645557,
    'colsample_bytree': 0.967548003596196,
    'min_child_weight': 1.2892385914567714,
    'reg_alpha': 0.009927790289736168,
    'reg_lambda': 1.7396121269192825e-06
}

# Train final LightGBM model
lgb_model = lgb.LGBMClassifier(**final_params)
lgb_model.fit(X_train_scaled, y_train)

# Evaluate model on test data
y_pred = lgb_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Severity Prediction Test Metrics (LightGBM):")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Save the model and scaler
# joblib.dump(lgb_model, 'severity_lgb_model.pkl')
# joblib.dump(scaler, 'severity_scaler.pkl')


In [5]:
import pandas as pd
import joblib
from catboost import CatBoostClassifier
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Filter for storm events
train_data = train_data[train_data['is_storm_lagged'] == 1]
test_data = test_data[test_data['is_storm_lagged'] == 1]

# Define features and target
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'customers_out', 'duration_hours',
    'desc_word_count', 'desc_char_count',
    'has_tornado', 'has_hail', 'has_flood', 'has_wind', 'has_tree',
    'has_power', 'has_damage', 'has_outage', 'has_broken', 'has_blown',
    'tmin', 'tmax', 'tavg', 'ppt'
]
target_col = 'severity_class'

# Prepare training features and target
X_train = train_data[severity_features]
y_train = train_data[target_col]

# Scale training features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Prepare test features and target
X_test = test_data[severity_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Define objective function for Optuna
def objective(trial):
    param = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'depth': trial.suggest_int('depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10.0, log=True),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
        'random_strength': trial.suggest_float('random_strength', 1e-8, 10.0, log=True),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'loss_function': 'MultiClass',
        'random_seed': 42,
        'verbose': 0
    }

    # Train CatBoost model
    model = CatBoostClassifier(**param)
    model.fit(X_train_scaled, y_train)

    # Predict and calculate accuracy
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)  # Adjust n_trials as needed

# Get best parameters
best_params = study.best_params
print("Best parameters:", best_params)

# Train final model with best parameters
final_params = {
    'loss_function': 'MultiClass',
    'random_seed': 42,
    'verbose': 0,
    **best_params
}
cat_model = CatBoostClassifier(**final_params)
cat_model.fit(X_train_scaled, y_train)

# Evaluate model on test data
y_pred = cat_model.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Severity Prediction Test Metrics (CatBoost):")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Save the model and scaler
# joblib.dump(cat_model, 'severity_cat_model.pkl')
# joblib.dump(scaler, 'severity_scaler.pkl')

[I 2025-05-13 18:20:28,917] A new study created in memory with name: no-name-a4f9baf2-b36c-47a6-a3b0-04efee4e4550
[I 2025-05-13 18:20:39,372] Trial 0 finished with value: 0.6591940976163451 and parameters: {'iterations': 441, 'depth': 5, 'learning_rate': 0.27037668043847507, 'l2_leaf_reg': 1.683330791847867e-08, 'bagging_temperature': 0.8543672325677113, 'random_strength': 2.2045047894594504e-08, 'border_count': 113}. Best is trial 0 with value: 0.6591940976163451.
[I 2025-05-13 18:20:47,859] Trial 1 finished with value: 0.5240257283390087 and parameters: {'iterations': 471, 'depth': 3, 'learning_rate': 0.051302601906070194, 'l2_leaf_reg': 1.1283195002585259e-08, 'bagging_temperature': 0.03940533661246959, 'random_strength': 0.0008401245739724709, 'border_count': 176}. Best is trial 0 with value: 0.6591940976163451.
[I 2025-05-13 18:20:51,250] Trial 2 finished with value: 0.511066969353008 and parameters: {'iterations': 100, 'depth': 6, 'learning_rate': 0.06203828859992205, 'l2_leaf_re

Best parameters: {'iterations': 771, 'depth': 10, 'learning_rate': 0.1358607927072531, 'l2_leaf_reg': 3.698596513420882e-08, 'bagging_temperature': 0.08933994426599372, 'random_strength': 0.10544889935311012, 'border_count': 178}
Severity Prediction Test Metrics (CatBoost):
Accuracy : 0.8459
Precision: 0.8459
Recall   : 0.8459
F1 Score : 0.8458

Confusion Matrix:
[[2866  175  165   29]
 [ 200 2416  363   52]
 [ 148  312 2638   54]
 [  25   45   61 1023]]

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.89      0.89      3235
           1       0.82      0.80      0.81      3031
           2       0.82      0.84      0.83      3152
          10       0.88      0.89      0.88      1154

    accuracy                           0.85     10572
   macro avg       0.85      0.85      0.85     10572
weighted avg       0.85      0.85      0.85     10572



In [6]:
import pandas as pd
import joblib
from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load training and test data
train_data = pd.read_csv('/kaggle/input/mlpr-data-split/train.csv')
test_data = pd.read_csv('/kaggle/input/mlpr-data-split/test.csv')

# Filter for storm events
train_data = train_data[train_data['is_storm_lagged'] == 1]
test_data = test_data[test_data['is_storm_lagged'] == 1]

# Define features and target
severity_features = [
    'DEATHS_INDIRECT', 'INJURIES_DIRECT', 'INJURIES_INDIRECT', 'DEATHS_DIRECT',
    'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'duration_hours',
    'desc_word_count', 'has_tornado', 'has_hail', 'has_flood', 'has_wind',
    'has_tree', 'has_broken', 'has_blown', 'tmin', 'tmax', 'tavg', 'ppt',
    'MAGNITUDE_IMPUTED', 'CZ_FIPS'
]
target_col = 'severity_class'

# Prepare training features and target
X_train = train_data[severity_features]
y_train = train_data[target_col]

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Prepare test features and target
X_test = test_data[severity_features]
y_test = test_data[target_col]
X_test_scaled = scaler.transform(X_test)

# Best parameters from Optuna
best_params = {
    'iterations': 771,
    'depth': 10,
    'learning_rate': 0.1358607927072531,
    'l2_leaf_reg': 3.698596513420882e-08,
    'bagging_temperature': 0.08933994426599372,
    'random_strength': 0.10544889935311012,
    'border_count': 178,
    'loss_function': 'MultiClass',
    'random_seed': 42,
    'verbose': 0
}
cat_model = CatBoostClassifier(**best_params)
cat_model.fit(X_train_scaled, y_train)

# Predict on test data
y_pred = cat_model.predict(X_test_scaled)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Severity Prediction Test Metrics (CatBoost):")
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Save model and scaler (uncomment if needed)
# joblib.dump(cat_model, 'severity_cat_model.pkl')
# joblib.dump(scaler, 'severity_scaler.pkl')

Severity Prediction Test Metrics (CatBoost):
Accuracy : 0.9996
Precision: 0.9996
Recall   : 0.9996
F1 Score : 0.9996

Confusion Matrix:
[[3233    2    0    0]
 [   2 3029    0    0]
 [   0    0 3152    0]
 [   0    0    0 1154]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3235
           1       1.00      1.00      1.00      3031
           2       1.00      1.00      1.00      3152
          10       1.00      1.00      1.00      1154

    accuracy                           1.00     10572
   macro avg       1.00      1.00      1.00     10572
weighted avg       1.00      1.00      1.00     10572

