In [34]:
import pandas as pd
import numpy as np

In [35]:
df  = pd.read_csv("car_dataset.csv")
df.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [36]:
df.Owner.unique()

array([0, 1, 3])

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Kms_Driven     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Seller_Type    301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


In [38]:
df.isnull().sum()

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64

## Pre-process Data

Burada geçen adımlar yayınlanırken de uygulanması gerekiyor. !

## Feature Engineering

In [39]:
df.Year.max()

2018

In [40]:
df.Fuel_Type.unique()

array(['Petrol', 'Diesel', 'CNG'], dtype=object)

In [41]:
df = df.drop("Car_Name", axis=1)
df["Year"] = df["Year"].max()-df["Year"]
df = pd.get_dummies(df,columns=["Fuel_Type",	"Seller_Type",	"Transmission"],drop_first=True) #One-hot encoding yapılmış.
df

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Owner,Fuel_Type_Diesel,Fuel_Type_Petrol,Seller_Type_Individual,Transmission_Manual
0,4,3.35,5.59,27000,0,False,True,False,True
1,5,4.75,9.54,43000,0,True,False,False,True
2,1,7.25,9.85,6900,0,False,True,False,True
3,7,2.85,4.15,5200,0,False,True,False,True
4,4,4.60,6.87,42450,0,True,False,False,True
...,...,...,...,...,...,...,...,...,...
296,2,9.50,11.60,33988,0,True,False,False,True
297,3,4.00,5.90,60000,0,False,True,False,True
298,9,3.35,11.00,87934,0,False,True,False,True
299,1,11.50,12.50,9000,0,True,False,False,True


In [42]:
X = df.drop("Selling_Price",axis=1)
y = df["Selling_Price"]

In [43]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler().fit(X)
#Min-max scaler ile normalizasyon işlemi yapılmıştır.

In [44]:
X=scaler.transform(X)

In [47]:
from xgboost import XGBRegressor
xgb_model = XGBRegressor()
xgb_model.fit(X,y)
xgb_model.score(X,y)

0.9999874602106124

In [48]:
y_pred= xgb_model.predict(X)

In [49]:
df["y_pred"] = y_pred

In [50]:
from sklearn.metrics import r2_score, mean_absolute_error

In [51]:
"r2",r2_score(y,y_pred), "rmse", mean_absolute_error(y,y_pred)**.5

('r2', 0.9999874602106124, 'rmse', 0.11074364851358144)

In [52]:
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
lr_model.fit(X,y)
lr_model.score(X,y)

0.8825741581640659

The best model is XGBoost.

### Save the model and scaler

In [24]:
list(df.drop(["Selling_Price","y_pred"],axis=1).columns)
#Feature engineering ile elde dilen değerlerin bir listeye kaydedilmesi gerekiyor.
#Modelin öğretildiği özelliklerden sormak için


['Year',
 'Present_Price',
 'Kms_Driven',
 'Owner',
 'Fuel_Type_Diesel',
 'Fuel_Type_Petrol',
 'Seller_Type_Individual',
 'Transmission_Manual']

In [53]:
#inputs
model = xgb_model
scaler = scaler
selected_features = list(df.drop(["Selling_Price","y_pred"],axis=1).columns)

import joblib

# save the model
joblib.dump(model,open("xgb_model.joblib","wb")) #ilk olarak model kaydedildi

# save our scaler
joblib.dump(scaler,open("scaler.joblib","wb")) #İkinci olarak scaler kaydedildi.Model katmanında

# save column names (selected features)
joblib.dump(selected_features, open("features_list.joblib","wb"))#3.olarak da featuresler kaydedildi.

<!-- Joblib ile de yapılabilir Pickle ile de yapılabilir. -->

In [None]:
# #Save the model using pickle
# import pickle
# # save the model to disk
# pickle.dump(model, open(model_file_path, 'wb'))

# #Load the model
# model = pickle.load(open(model_file_path, 'rb'))

# #Saving a Keras model
# # Calling `save('my_model')` creates a SavedModel folder `my_model`.
# model.save("my_model")

## Real-time Prediction

### Load feature names

In [54]:
columns = joblib.load("features_list.joblib")
columns

['Year',
 'Present_Price',
 'Kms_Driven',
 'Owner',
 'Fuel_Type_Diesel',
 'Fuel_Type_Petrol',
 'Seller_Type_Individual',
 'Transmission_Manual']

### Input new data

In [59]:
sample_one = [{
"Year":2014,
"Selling_Price":3.35,
"Present_Price":5.59,
"Kms_Driven":27000,
"Fuel_Type":"Petrol",
"Seller_Type":"Dealer",
"Transmission":"Manual",
"Owner":0
    }]

In [60]:
df_s = pd.DataFrame(sample_one)
df_s

Unnamed: 0,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0


In [61]:
df_s["Year"] = 2023-df_s["Year"]
df_s = pd.get_dummies(df_s).reindex(columns=columns, fill_value=0)
df_s

Unnamed: 0,Year,Present_Price,Kms_Driven,Owner,Fuel_Type_Diesel,Fuel_Type_Petrol,Seller_Type_Individual,Transmission_Manual
0,9,5.59,27000,0,0,True,0,True


### Load Model and its data scaler

In [62]:
scaler = joblib.load(open("scaler.joblib","rb"))
model = joblib.load(open("xgb_model.joblib","rb"))
df_s = scaler.transform(df_s)


### Predict

In [63]:
pred_price = round(model.predict(df_s)[0] * 10_000)
print(f"Your car's price: ${pred_price}")

Your car's price: $26922


In [64]:
!pip install streamlit

Defaulting to user installation because normal site-packages is not writeable
Collecting streamlit
  Downloading streamlit-1.31.1-py2.py3-none-any.whl.metadata (8.1 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Downloading altair-5.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Downloading blinker-1.7.0-py3-none-any.whl.metadata (1.9 kB)
Collecting click<9,>=7.0 (from streamlit)
  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting pyarrow>=7.0 (from streamlit)
  Downloading pyarrow-15.0.0-cp39-cp39-macosx_11_0_arm64.whl.metadata (3.0 kB)
Collecting rich<14,>=10.14.0 (from streamlit)
  Downloading rich-13.7.0-py3-none-any.whl.metadata (18 kB)
Collecting tenacity<9,>=8.1.0 (from streamlit)
  Downloading tenacity-8.2.3-py3-none-any.whl.metadata (1.0 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting tzlocal<6,>=1.1 (from streamlit)
  Downloading tzlocal-5.