In [3]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# open car data csv and get an overview about the columns and their values
abc='car data.csv'
car_data= pd.read_csv(abc)
car_data.columns
car_data['Car_Name'].value_counts() # the Car_Name city is represented the most so let's try to predict the selling price of a "city" car depending on the kms driven
city_data=car_data[car_data['Car_Name'] == 'city']

# now let's plot the selling price against the km's driven as well as against the year in the same plot, we'll have to create separate dataframes for this
year= city_data['Year']
kms= city_data['Kms_Driven']
selling_price= city_data['Selling_Price']

# plt.plot( year,selling_price,'o') # shows that the selling price seems to increase given the year so let's take the kms driven and the year as our features

# let's craft our features and depending variable (Price)
X = city_data[['Kms_Driven', 'Year']]  # Features
y = city_data['Selling_Price']  # Selling Prices

# split dataset into train and data set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# let's build our model!
model= LinearRegression()
model.fit(X_train, y_train)

# let's make predictions
y_pred = model.predict(X_test)

# now let's look at our models accuracy
r2 = r2_score(y_test, y_pred)
# print(f"R²-Wert: {r2}") #79,2 % that is great!


# Now let's add a cool functionality so a vendor could input a year and the kilometers driven to use the model's prediction for a starting point for the price he would give the seller
def predict_price(model):
    # User Input
    year = int(input("Please enter the car model's year: "))
    kms_driven = int(input("Please enter the kilometers driven: "))
    
    # Transform to dataframe
    input_features = pd.DataFrame([[kms_driven, year]], columns=['Kms_Driven', 'Year'])
    
    # Make a prediction
    predicted_price = model.predict(input_features)
    
    return predicted_price[0]

# Using the function
predicted_selling_price = predict_price(model)
print(f"Predicted selling price: {predicted_selling_price}k $")



Please enter the car model's year:  2016
Please enter the kilometers driven:  30000


Predicted selling price: 9.061376909972068k $


In [59]:
car_data.columns

Index(['Car_Name', 'Year', 'Selling_Price', 'Present_Price', 'Kms_Driven',
       'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner'],
      dtype='object')