In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
from sklearn.preprocessing import LabelEncoder

In [27]:
# Load the dataset
df = pd.read_csv("./Car_Evalutaion.csv")

In [40]:
df.head()

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,3,3,0,0,2,2,2
1,3,3,0,0,2,0,2
2,3,3,0,0,1,1,2
3,3,3,0,0,1,2,2
4,3,3,0,0,1,0,2


In [41]:
df.isnull().sum()

vhigh      0
vhigh.1    0
2          0
2.1        0
small      0
low        0
unacc      0
dtype: int64

In [28]:
# Encode categorical variables if necessary
le = LabelEncoder()
for col in df.columns:
    df[col] = le.fit_transform(df[col])

In [29]:
X = df.iloc[:, :-1]  # Features
y = df.iloc[:, -1]   # Target variable

In [30]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
# Train Random Forest with 10 trees
rf = RandomForestClassifier(n_estimators=10, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [32]:
# Calculate accuracy
accuracy_old = accuracy_score(y_test, y_pred)

In [33]:
# Confusion matrix for initial model
cm_old = confusion_matrix(y_test, y_pred)
print("Confusion Matrix - Initial Model:")
print(cm_old)
print("\nClassification Report - Initial Model:")
print(classification_report(y_test, y_pred))

Confusion Matrix - Initial Model:
[[ 74   1   2   0]
 [  6   8   0   1]
 [  3   0 234   0]
 [  3   0   0  14]]

Classification Report - Initial Model:
              precision    recall  f1-score   support

           0       0.86      0.96      0.91        77
           1       0.89      0.53      0.67        15
           2       0.99      0.99      0.99       237
           3       0.93      0.82      0.88        17

    accuracy                           0.95       346
   macro avg       0.92      0.83      0.86       346
weighted avg       0.96      0.95      0.95       346



In [34]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 108 candidates, totalling 540 fits


In [35]:
# Train the best model
best_params = grid_search.best_params_
best_rf = RandomForestClassifier(**best_params, random_state=42)
best_rf.fit(X_train, y_train)
y_pred_best = best_rf.predict(X_test)

In [36]:
# Calculate accuracy after tuning
accuracy_new = accuracy_score(y_test, y_pred_best)

In [37]:
# Confusion matrix for best model
cm_new = confusion_matrix(y_test, y_pred_best)
print("\nConfusion Matrix - Tuned Model:")
print(cm_new)
print("\nClassification Report - Tuned Model:")
print(classification_report(y_test, y_pred_best))


Confusion Matrix - Tuned Model:
[[ 73   1   3   0]
 [  1  10   0   4]
 [  1   0 236   0]
 [  2   0   0  15]]

Classification Report - Tuned Model:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95        77
           1       0.91      0.67      0.77        15
           2       0.99      1.00      0.99       237
           3       0.79      0.88      0.83        17

    accuracy                           0.97       346
   macro avg       0.91      0.87      0.89       346
weighted avg       0.97      0.97      0.96       346



In [38]:
# ROC-AUC Score
roc_auc_old = roc_auc_score(y_test, rf.predict_proba(X_test), multi_class="ovr")
roc_auc_new = roc_auc_score(y_test, best_rf.predict_proba(X_test), multi_class="ovr")
print(f"\nROC-AUC Score (Initial Model): {roc_auc_old:.4f}")
print(f"ROC-AUC Score (Tuned Model): {roc_auc_new:.4f}")


ROC-AUC Score (Initial Model): 0.9931
ROC-AUC Score (Tuned Model): 0.9955
