# Car Price Prediction using ML

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Read the Year, Price, Used/New, and Mileage from the dataset
carData = pd.read_csv('/kaggle/input/carsforsale/cars_raw.csv')
carData = carData[['Year', 'Price', 'Used/New', 'Mileage']]

# Convert the Price column into integer values
carData = carData[carData['Price'].str.contains('Not Priced') == False]
carData['Price'] = [price.replace(',','') for price in carData['Price']]
carData['Price'] = [int(price.replace('$','')) for price in carData['Price']]

# Encode Used to 1, Certified to 2, and New to 3
carData['Used/New'] = carData['Used/New'].replace('Used', 1)
carData['Used/New'] = carData['Used/New'].replace('Certified', 2, regex = True)
carData['Used/New'] = carData['Used/New'].replace('New', 3)

print(carData)

In [3]:
# Visualization of data to find patterns
sns.pairplot(data = carData, hue = "Used/New")

In [8]:
# Splitting the dataset into training and testing
X = carData.drop('Price', axis=1)
y = carData['Price']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [68]:
# Use a regression algorithm (Decision Tree) in order to predict 
# the price of the car using Year, Mileage, and Used/New status

from sklearn import tree
from sklearn.metrics import r2_score

regressor = tree.DecisionTreeRegressor()
regressor.fit(X_train, y_train)

y_pred = regressor.predict(X_test)

print(r2_score(y_pred, y_test)) # R^2 score shows how well our model fits the data

In [78]:
year = int(input('What is the year of your car?'))
used = int(input('Type "1" if your car is used, "2" if your car is certified pre-owned, or "3" if your car is new:'))
mileage = int(input('What is the mileage of your car?'))


testData = [[year, used, mileage]]
testDataFrame = pd.DataFrame(testData, columns = ['Year', 'Used/New', 'Mileage'])

y_pred = regressor.predict(testDataFrame)
print('Predicted price:', y_pred)