In [8]:
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

In [9]:
df = pd.read_csv('data2.csv')
X = df.drop(['severity_class'], axis=1)
y = df['severity_class'] - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(y_train),
                                                  y=y_train)
weights = np.array([class_weights[i] for i in y_train])
dtrain = xgb.DMatrix(X_train, label=y_train, weight=weights)
dtest = xgb.DMatrix(X_test, label=y_test)

In [11]:
params = {
    'objective': 'multi:softmax',   # For multi-class classification
    'num_class': 9,                 # Number of classes in your dataset
    'learning_rate': 0.1,
    'max_depth': 6,
    'booster': 'dart',
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'eval_metric': 'mlogloss',
}

In [12]:
evallist = [(dtest, 'eval'), (dtrain, 'train')]
model = xgb.train(params, dtrain, num_boost_round=100, evals=evallist, early_stopping_rounds=10)

[0]	eval-mlogloss:2.19098	train-mlogloss:2.17069
[1]	eval-mlogloss:2.18629	train-mlogloss:2.14560
[2]	eval-mlogloss:2.18190	train-mlogloss:2.11772
[3]	eval-mlogloss:2.17621	train-mlogloss:2.09408
[4]	eval-mlogloss:2.17033	train-mlogloss:2.06630
[5]	eval-mlogloss:2.16617	train-mlogloss:2.03820
[6]	eval-mlogloss:2.16181	train-mlogloss:2.01777
[7]	eval-mlogloss:2.15751	train-mlogloss:1.99328
[8]	eval-mlogloss:2.15188	train-mlogloss:1.97049
[9]	eval-mlogloss:2.14593	train-mlogloss:1.94861
[10]	eval-mlogloss:2.14037	train-mlogloss:1.92916
[11]	eval-mlogloss:2.13775	train-mlogloss:1.91443
[12]	eval-mlogloss:2.13426	train-mlogloss:1.89688
[13]	eval-mlogloss:2.12982	train-mlogloss:1.87781
[14]	eval-mlogloss:2.12548	train-mlogloss:1.85890
[15]	eval-mlogloss:2.12163	train-mlogloss:1.83777
[16]	eval-mlogloss:2.11783	train-mlogloss:1.81933
[17]	eval-mlogloss:2.11411	train-mlogloss:1.80207
[18]	eval-mlogloss:2.11035	train-mlogloss:1.78484
[19]	eval-mlogloss:2.10707	train-mlogloss:1.77083
[20]	eval-

In [13]:
model = XGBClassifier(
    objective='multi:softmax',
    booster='dart',
    learning_rate=0.1,
    num_class=9,                # specify the number of classes
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='mlogloss',       # Log loss for multi-class classification
)

model.fit(X_train, y_train, sample_weight=weights)

In [14]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
balanced_acc = balanced_accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Balanced Accuracy: {balanced_acc:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.22
Balanced Accuracy: 0.13
Precision: 0.38
Recall: 0.22
F1 Score: 0.26
