**CAR PRICE PREDICTION WITH MACHINE LEARNING**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error


In [2]:
data = pd.read_csv("/content/car_data.csv")  # Replace with your dataset path
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [3]:
# Add a new column for car age
data['Car_Age'] = 2025 - data['Year']

# Drop unnecessary columns
data.drop(['Car_Name', 'Year'], axis=1, inplace=True)

# Encode categorical variables
data = pd.get_dummies(data, drop_first=True)

data.head()


Unnamed: 0,Selling_Price,Present_Price,Driven_kms,Owner,Car_Age,Fuel_Type_Diesel,Fuel_Type_Petrol,Selling_type_Individual,Transmission_Manual
0,3.35,5.59,27000,0,11,False,True,False,True
1,4.75,9.54,43000,0,12,True,False,False,True
2,7.25,9.85,6900,0,8,False,True,False,True
3,2.85,4.15,5200,0,14,False,True,False,True
4,4.6,6.87,42450,0,11,True,False,False,True


In [4]:
X = data.drop('Selling_Price', axis=1)
y = data['Selling_Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)


In [6]:
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)


In [7]:
print("Linear Regression R² Score:", r2_score(y_test, y_pred_lr))
print("Random Forest R² Score:", r2_score(y_test, y_pred_rf))

print("MAE (Random Forest):", mean_absolute_error(y_test, y_pred_rf))


Linear Regression R² Score: 0.8488707839193155
Random Forest R² Score: 0.9592537231287817
MAE (Random Forest): 0.633129508196722


In [9]:
sample_car = pd.DataFrame({
    'Present_Price': [9.85],
    'Driven_kms': [45000],
    'Owner': [0],
    'Car_Age': [5],
    'Fuel_Type_Diesel': [0],
    'Fuel_Type_Petrol': [1],
    'Selling_type_Individual': [0],
    'Transmission_Manual': [1]
})

# Ensure the columns are in the same order as the training data
sample_car = sample_car[X.columns]


predicted_price = rf_model.predict(sample_car)
print("Predicted Car Price:", predicted_price)

Predicted Car Price: [7.852]


In [10]:
import pickle

# Save model
with open('car_price_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

# Load model
# with open('car_price_model.pkl', 'rb') as f:
#     model = pickle.load(f)
