In [9]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# open car data csv and get an overview about the columns and their values
abc='car data.csv'
car_data= pd.read_csv(abc)
car_data.columns
car_data['Car_Name'].value_counts() # the Car_Name city is represented the most so let's try to predict the selling price of a "city" car depending on the kms driven
city_data=car_data[car_data['Car_Name'] == 'city']

# One-hot encode categorical variables
categorical_features = ['Transmission', 'Fuel_Type']
one_hot = OneHotEncoder()
transformer = ColumnTransformer([("one_hot", one_hot, categorical_features)], remainder="passthrough")

# let's craft our features and depending variable (Price)
X = city_data[['Kms_Driven', 'Year', 'Transmission','Fuel_Type']]  # Features
y = city_data['Selling_Price']  # Selling Prices
X_transformed= transformer.fit_transform(X)

# split dataset into train and data set
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=1)

# let's build our model!
model= LinearRegression()
model.fit(X_train, y_train)

# let's make predictions
y_pred = model.predict(X_test)

# now let's look at our models accuracy
r2 = r2_score(y_test, y_pred)
print(f"R²-Wert: {r2}") #78% is stable


# Now let's add a cool functionality so a vendor could input a year, the kilometers driven, the fuel type and the submission type to use the model's prediction for a starting point for the price he would give the seller
def predict_price(model,transformer):
    # User Input
    year = int(input("Please enter the car model's year: "))
    kms_driven = int(input("Please enter the kilometers driven: "))
    fuel_type= input("Please enter the fuel type (Petrol/Diesel/CNG): ")
    transmission= input("Please enter the transmission type (Manual/Automatic): ")
    
    # Transform to dataframe
    input_features = pd.DataFrame({'Kms_Driven': [kms_driven],'Year': [year],'Transmission': [transmission],'Fuel_Type': [fuel_type]})

    #Transform input data using the same transformer use during training
    input_transformed = transformer.transform(input_features)
    
    # Make a prediction
    predicted_price = model.predict(input_transformed)
    
    return predicted_price[0]

# Using the function
predicted_selling_price = predict_price(model,transformer)
print(f"Predicted selling price: {predicted_selling_price}k $")



R²-Wert: 0.7780802194403487


Please enter the car model's year:  2016
Please enter the kilometers driven:  50000
Please enter the fuel type (Petrol/Diesel/CNG):  Diesel
Please enter the transmission type (Manual/Automatic):  Manual


Predicted selling price: 8.681447603449215k $


In [10]:
car_data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0
