# Car Price Prediction with Machine Learning

In [None]:

# Car Price Prediction with Machine Learning

## 1. Import Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

## 2. Load Dataset
df = pd.read_csv('../data/car data.csv')
df.head()

## 3. Feature Engineering
df['Car_Age'] = 2025 - df['Year']
df.drop(['Year', 'Car_Name'], axis=1, inplace=True)

## 4. Define X and y
X = df.drop('Selling_Price', axis=1)
y = df['Selling_Price']

## 5. Identify categorical and numerical columns
categorical_cols = ['Fuel_Type', 'Selling_type', 'Transmission']
numerical_cols = [col for col in X.columns if col not in categorical_cols]

## 6. Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),
        ('cat', OneHotEncoder(drop='first'), categorical_cols)
    ])

## 7. Create and train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

model.fit(X_train, y_train)

## 8. Evaluate the model
y_pred = model.predict(X_test)
print("R² Score:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

## 9. Save the model
joblib.dump(model, '../app/model.pkl')
