In [445]:
import pandas as pd
data = pd.read_csv("Car Data.csv")
data.head()

Unnamed: 0,Make,Model,Version,Price,Make_Year,CC,Assembly,Mileage,Registered City,Transmission
0,Mitsubishi,Minica,Black Minica,1065000,2019,660,Imported,68000,Lahore,Automatic
1,Audi,A6,1.8 TFSI Business Class Edition,9300000,2015,1800,Local,70000,Lahore,Automatic
2,Toyota,Aqua,G,2375000,2014,1500,Imported,99900,Islamabad,Automatic
3,Honda,City,1.3 i-VTEC,2600000,2017,1300,Local,55000,Islamabad,Manual
4,Toyota,Corolla,GLi Automatic 1.3 VVTi,2500000,2017,1300,Local,80000,Lahore,Automatic


In [446]:
#Model
X = data.drop(columns = 'Price')
y = data['Price']

In [447]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [448]:
#Importing Linear Regression.
from sklearn.linear_model import LinearRegression

# Importing r2 score for measuring regression problems.
from sklearn.metrics import r2_score

# Using One Hot Encoder for encoding categorical features as a one-hot numeric array.
from sklearn.preprocessing import OneHotEncoder

#For transforming columns we will use column transformer.
from sklearn.compose import make_column_transformer

#In pipeline we will send raw data from one side and take predictions from other side.
from sklearn.pipeline import make_pipeline

In [449]:
#Making object of One Hot Encoder.
ohe = OneHotEncoder()
#Encoding categorical features as a one-hot numeric array.
ohe.fit(X[['Make', 'Model', 'Version', 'Assembly', 'Registered City', 'Transmission']])

OneHotEncoder()

In [450]:
#Transforming columns.
column_trans = make_column_transformer((OneHotEncoder(categories=ohe.categories_)
        , ['Make', 'Model', 'Version', 'Assembly', 'Registered City', 'Transmission'])
                                       , remainder = 'passthrough')

In [451]:
#Making object of linear regression.
lr = LinearRegression()

In [452]:
#Making a pipeline
pipe = make_pipeline(column_trans, lr)
#This pipeline will give transformed columns +  previous columns to linear regression model.

In [453]:
pipe.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(categories=[array(['Audi', 'BMW', 'Cadillac', 'Changan', 'Chery', 'Chevrolet',
       'Chrysler', 'DFSK', 'Daehan', 'Daewoo', 'Daihatsu', 'Datsun',
       'FAW', 'Fiat', 'Ford', 'Golden', 'Hino', 'Honda', 'Hummer',
       'Hyundai', 'Isuzu', 'JAC', 'JMC', 'JW', 'Jaguar',...
       'Tandiliyawala', 'Tando Allah Yar', 'Tando Muhammad Khan',
       'Taxila', 'Thatta', 'Toba Tek Singh', 'Umer Kot', 'Un-Registered',
       'Vehari', 'Wah cantt', 'Warburton', 'Wazirabad', 'Yazman mandi',
       'Zafarwal'], dtype=object),
                                                                            array(['Automatic', 'Manual'], dtype=object)]),
                                                  ['Make', 'Model', 'Version',
                                              

In [454]:
#Predicting what the r2 score is.
y_pred = pipe.predict(X_test)
y_pred

array([ 981586.48593491, 3745959.79155192, 2232525.86681619, ...,
       2446126.51242256,  114330.56865865,  428798.67711052])

In [455]:
#Predicting what the r2 score is.
r2_score(y_test,y_pred)

0.9081363287597034

In [456]:
#Import pickle library. We will use it to save the structure of our pipeline to a file.
import pickle

In [457]:
#Saving our pipeline to a file.
pickle.dump(pipe, open('LinearRegression.pkl', 'wb'))

In [458]:
car_price = pipe.predict(pd.DataFrame([['Honda', 'City', '1.3 i-VTEC', 2014, 
                                        1300, 'Local', 10000, 'Islamabad', 'Manual']]
    , columns = ['Make', 'Model', 'Version', 'Make_Year', 'CC', 'Assembly', 'Mileage', 'Registered City', 'Transmission']))

In [460]:
#Printing Output
print("Car Price is: ", int(car_price[0]), "PKR")

Car Price is:  1936276 PKR
