In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

In [5]:
columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
data = pd.read_csv("car.data.csv", names=columns)
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [6]:
df_encoded = pd.get_dummies(data, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])
X = df_encoded.drop('class', axis=1)
y = df_encoded['class']

encoder = LabelEncoder()
y_trans = encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_trans, test_size=0.2, random_state=32)

In [7]:
xgboost_model = XGBClassifier(objective='multi:softmax')
xgboost_model.fit(X_train, y_train, verbose = 5)
y_pred_xgb = xgboost_model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred_xgb))

Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99        77
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00       245
           3       1.00      1.00      1.00        12

    accuracy                           1.00       346
   macro avg       1.00      1.00      1.00       346
weighted avg       1.00      1.00      1.00       346



In [8]:
catboost_model = CatBoostClassifier(iterations = 150 , depth = 5, learning_rate = 0.3 , loss_function='MultiClass')
catboost_model.fit(X_train, y_train, verbose = 15)
y_pred_cat = catboost_model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred_cat))

0:	learn: 0.9716852	total: 158ms	remaining: 23.6s
15:	learn: 0.2321337	total: 207ms	remaining: 1.73s
30:	learn: 0.1205501	total: 251ms	remaining: 962ms
45:	learn: 0.0782049	total: 307ms	remaining: 695ms
60:	learn: 0.0604741	total: 353ms	remaining: 516ms
75:	learn: 0.0467108	total: 411ms	remaining: 400ms
90:	learn: 0.0382439	total: 463ms	remaining: 300ms
105:	learn: 0.0322731	total: 509ms	remaining: 211ms
120:	learn: 0.0276432	total: 559ms	remaining: 134ms
135:	learn: 0.0243981	total: 611ms	remaining: 62.9ms
149:	learn: 0.0214798	total: 650ms	remaining: 0us
Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99        77
           1       1.00      1.00      1.00        12
           2       1.00      0.99      1.00       245
           3       1.00      1.00      1.00        12

    accuracy                           0.99       346
   macro avg       0.99      1.00      1.00       346
weighted avg       0.99      0.99 

In [9]:
accuracy_xgb = (y_test == y_pred_xgb).mean()
accuracy_cat = (y_test == y_pred_cat).mean()

print(f"Overall accuracy")
print(f"XGBoost: {accuracy_xgb}")
print(f"CatBoost: {accuracy_cat}")

Overall accuracy
XGBoost: 0.9971098265895953
CatBoost: 0.5505195629656855
