In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error, r2_score

In [14]:
# Load dataset
df = pd.read_csv('used_cars.csv')

In [15]:
# Encode categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

In [16]:

# Features and target
X = df.drop('Price', axis=1) 
y = df['Price']

In [17]:

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [19]:
# Models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor()
}

# Train and evaluate
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name}:")
    print(f"  RMSE: {root_mean_squared_error(y_test, y_pred)}")
    print(f"  R2 Score: {r2_score(y_test, y_pred)}")
    print()


Linear Regression:
  RMSE: 5860.086881150066
  R2 Score: 0.9995566824705092

Decision Tree:
  RMSE: 3501.008175203686
  R2 Score: 0.9998417684638421

Random Forest:
  RMSE: 2457.7222450393183
  R2 Score: 0.9999220219455689

