In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
data = pd.read_csv('car_evaluation.csv')

In [3]:
replacement_map = {'vhigh': 4, 'high': 3, 'med': 2, 'low': 1}
data['buying'] = data['buying'].replace(replacement_map)

In [4]:
replacement_map = {'vhigh': 4, 'high': 3, 'med': 2, 'low': 1}
data['maint'] = data['maint'].replace(replacement_map)

In [5]:
replacement_map = {'small': 1, 'med': 2, 'big': 3}
data['lug_boot'] = data['lug_boot'].replace(replacement_map)

In [6]:
data = data[~data.apply(lambda row: row.astype(str).str.contains('5more|more').any(), axis=1)]

In [7]:
X = data.drop('safety', axis=1)
y = data['safety']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
random_forest_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest_classifier.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [10]:
y_pred = random_forest_classifier.predict(X_test)

In [11]:
accuracy = accuracy_score(y_test, y_pred)

classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')

print('Classification Report:')
print(classification_rep)

Accuracy: 0.023121387283236993
Classification Report:
              precision    recall  f1-score   support

        high       0.02      0.02      0.02        54
         low       0.03      0.03      0.03        58
         med       0.02      0.02      0.02        61

    accuracy                           0.02       173
   macro avg       0.02      0.02      0.02       173
weighted avg       0.02      0.02      0.02       173



In [14]:
new_data = pd.DataFrame({
    'buying': ['4'],
    'maint': ['1'],
    'doors': [2],
    'persons': [2],
    'lug_boot': ['1']
})

In [15]:
predictions = random_forest_classifier.predict(new_data)

# Print the predictions
print('Predicted Safety Category for New Data:')
print(predictions)

Predicted Safety Category for New Data:
['low']


In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Read the dataset
data = pd.read_csv('car_evaluation.csv')

# Data Preprocessing
replacement_map = {'vhigh': 4, 'high': 3, 'med': 2, 'low': 1}
data['buying'] = data['buying'].replace(replacement_map)
data['maint'] = data['maint'].replace(replacement_map)
replacement_map = {'small': 1, 'med': 2, 'big': 3}
data['lug_boot'] = data['lug_boot'].replace(replacement_map)
data = data[~data.apply(lambda row: row.astype(str).str.contains('5more|more').any(), axis=1)]

# Split the data into features (X) and the target (y)
X = data.drop('safety', axis=1)
y = data['safety']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300, 400, 500, 600, 700],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_classifier = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_rf_classifier = grid_search.best_estimator_

# Fit the model on the training data with the best hyperparameters
best_rf_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = best_rf_classifier.predict(X_test)

# Calculate accuracy and print the classification report
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Best Hyperparameters: {best_params}')
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_rep)

Best Hyperparameters: {'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 300}
Accuracy: 0.03468208092485549
Classification Report:
              precision    recall  f1-score   support

        high       0.00      0.00      0.00        54
         low       0.09      0.10      0.10        58
         med       0.00      0.00      0.00        61

    accuracy                           0.03       173
   macro avg       0.03      0.03      0.03       173
weighted avg       0.03      0.03      0.03       173

