In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv("C:\\Users\\Dharani Ravi\\Desktop\\ML projects\\carprice\\car_prices.csv")  

print("Dataset Sample:\n", df.head())
X = df.drop(columns=['Model'])  
y = df['Model']
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y) 
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

encoder = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), categorical_cols)], remainder='passthrough')

X = encoder.fit_transform(X).toarray()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

param_grid = {
    'max_depth': [3, 5, 10],  
    'min_samples_split': [2, 5, 10]
}
dt = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(dt, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
print("\nBest Hyperparameters:", best_params)

best_model = DecisionTreeClassifier(**best_params, random_state=42)
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("\nTuned Model Accuracy:", accuracy)

untuned_model = DecisionTreeClassifier(random_state=42)
untuned_model.fit(X_train, y_train)
y_pred_untuned = untuned_model.predict(X_test)

untuned_accuracy = accuracy_score(y_test, y_pred_untuned)
print("Untuned Model Accuracy:", untuned_accuracy)
print("\n🔹 Model Comparison:")
print(f"Untuned Accuracy: {untuned_accuracy:.4f}")
print(f"Tuned Accuracy: {accuracy:.4f}")

if accuracy > untuned_accuracy:
    print("\nTuning improved the model, reducing overfitting and increasing generalization.")
else:
    print("\nTuning did not improve performance significantly. Try different hyperparameters.")


Dataset Sample:
         Make    Model  Year  Mileage  EngineSize Transmission FuelType  Price
0   Infiniti      Q50  2017    10000         2.5       Manual   Petrol  29000
1    Hyundai   Sonata  2017    50000         5.0    Automatic   Hybrid  57000
2    Hyundai  Elantra  2023    70000         1.4       Manual   Petrol  15000
3  Chevrolet   Impala  2020    95000         1.4       Manual   Hybrid  49000
4       Ford    Focus  2017    30000         3.6    Automatic   Diesel  33000

Best Hyperparameters: {'max_depth': 10, 'min_samples_split': 2}

Tuned Model Accuracy: 0.365
Untuned Model Accuracy: 0.71

🔹 Model Comparison:
Untuned Accuracy: 0.7100
Tuned Accuracy: 0.3650

Tuning did not improve performance significantly. Try different hyperparameters.
