In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [14]:
data = pd.read_csv('Clean Car.csv')
data = data.drop(columns=['Unnamed: 0'])
data.head(2)

Unnamed: 0,name,company,year,Price,kms_driven,fuel_type
0,Hyundai Santro Xing,Hyundai,2007,80000,45000,Petrol
1,Mahindra Jeep CL550,Mahindra,2006,425000,40,Diesel


In [15]:
numeric_features = ['year', 'kms_driven'] ## Adjust as needed 
categorical_features = ['name', 'company', 'fuel_type']
target = 'Price'

In [17]:
x = data[numeric_features + categorical_features]
y = data[target]

In [19]:
## Split into training and Testing 
x_train, x_test, y_train , y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [21]:
# 📌 Preprocessing for numeric and categorical features

preprocessing = ColumnTransformer([
  ('num', 'passthrough',numeric_features), ## For Decision Trees , scaling is not need 
  ('cat', OneHotEncoder(handle_unknown='ignore'),categorical_features)
])

In [22]:
## Create Decision Tree Regressor 

regressor = DecisionTreeRegressor(
  max_depth=5, 
  min_samples_split= 10, 
  random_state = 42 
)


In [23]:
## create Pipeline 
pipeline = Pipeline([
  ('preprocessing', preprocessing), 
  ('regressor', regressor)
])

In [24]:
## Fit and predict 
pipeline.fit(x_train,y_train)
y_pred = pipeline.predict(x_test)

In [26]:
r2 = r2_score(y_test, y_pred)
r2

0.2757327249125423