In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model
import math
import pickle #dump or save your model
from joblib import dump,load 

In [2]:
filename = "carprices.csv"
dataframe = pd.read_csv(filename)
dataframe.columns
#dataframe

Index(['Car name', 'Fuel type', 'Number of doors', 'Car body', 'Driver wheel',
       'Car length', 'Car width', 'Number of cylinders', 'Horse power',
       'Price'],
      dtype='object')

In [3]:
#Number of cylinders => "int"
dataframe['Number of cylinders'].drop_duplicates()
dataframe['Number of cylinders'] = dataframe['Number of cylinders'].map({
    'four': 4,
    'five': 5,
    'six': 6,
    'eight':8
})
dataframe['Number of doors'].drop_duplicates()
dataframe['Number of doors'] = dataframe['Number of doors'].map({
    'two': 2,
    'four': 4
})
dataframe

Unnamed: 0,Car name,Fuel type,Number of doors,Car body,Driver wheel,Car length,Car width,Number of cylinders,Horse power,Price
0,audi fox,gas,2,sedan,fwd,177.3,66.3,5,110,15250
1,bmw x1,gas,2,sedan,rwd,176.8,64.8,6,121,20970
2,mazda glc deluxe,diesel,4,sedan,fwd,177.8,66.5,4,120,10795
3,buick century luxus (sw),diesel,4,wagon,rwd,190.9,70.3,5,123,28248
4,buick skylark,gas,2,convertible,rwd,180.3,70.5,8,155,35056
5,mitsubishi g4,gas,2,hatchback,fwd,157.3,63.8,4,116,9959
6,peugeot 504,gas,4,sedan,rwd,186.7,68.4,4,120,11900
7,peugeot 304,diesel,4,sedan,rwd,186.7,68.4,4,152,13200
8,porsche macan,gas,2,hatchback,rwd,168.9,68.3,4,143,22018
9,porsche cayenne,gas,2,hatchback,rwd,66.5,55.2,8,90,9295


In [4]:
dataframe['Car body'].drop_duplicates()

0          sedan
3          wagon
4    convertible
5      hatchback
Name: Car body, dtype: object

In [5]:
#split columns => get dummies
merged_dataframe = pd.concat(
    [dataframe, pd.get_dummies(dataframe['Car body'])],
    axis='columns'
)

In [6]:
merged_dataframe = merged_dataframe.drop(['Car body'], axis='columns')
merged_dataframe

Unnamed: 0,Car name,Fuel type,Number of doors,Driver wheel,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon
0,audi fox,gas,2,fwd,177.3,66.3,5,110,15250,False,False,True,False
1,bmw x1,gas,2,rwd,176.8,64.8,6,121,20970,False,False,True,False
2,mazda glc deluxe,diesel,4,fwd,177.8,66.5,4,120,10795,False,False,True,False
3,buick century luxus (sw),diesel,4,rwd,190.9,70.3,5,123,28248,False,False,False,True
4,buick skylark,gas,2,rwd,180.3,70.5,8,155,35056,True,False,False,False
5,mitsubishi g4,gas,2,fwd,157.3,63.8,4,116,9959,False,True,False,False
6,peugeot 504,gas,4,rwd,186.7,68.4,4,120,11900,False,False,True,False
7,peugeot 304,diesel,4,rwd,186.7,68.4,4,152,13200,False,False,True,False
8,porsche macan,gas,2,rwd,168.9,68.3,4,143,22018,False,True,False,False
9,porsche cayenne,gas,2,rwd,66.5,55.2,8,90,9295,False,True,False,False


In [7]:
merged_dataframe['Driver wheel'].drop_duplicates()

0     fwd
1     rwd
14    4wd
Name: Driver wheel, dtype: object

In [8]:
merged_dataframe = pd.concat(
    [merged_dataframe, pd.get_dummies(dataframe['Driver wheel'])],
    axis='columns'
)
merged_dataframe = merged_dataframe.drop(['Driver wheel'], axis='columns')
merged_dataframe

Unnamed: 0,Car name,Fuel type,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,4wd,fwd,rwd
0,audi fox,gas,2,177.3,66.3,5,110,15250,False,False,True,False,False,True,False
1,bmw x1,gas,2,176.8,64.8,6,121,20970,False,False,True,False,False,False,True
2,mazda glc deluxe,diesel,4,177.8,66.5,4,120,10795,False,False,True,False,False,True,False
3,buick century luxus (sw),diesel,4,190.9,70.3,5,123,28248,False,False,False,True,False,False,True
4,buick skylark,gas,2,180.3,70.5,8,155,35056,True,False,False,False,False,False,True
5,mitsubishi g4,gas,2,157.3,63.8,4,116,9959,False,True,False,False,False,True,False
6,peugeot 504,gas,4,186.7,68.4,4,120,11900,False,False,True,False,False,False,True
7,peugeot 304,diesel,4,186.7,68.4,4,152,13200,False,False,True,False,False,False,True
8,porsche macan,gas,2,168.9,68.3,4,143,22018,False,True,False,False,False,False,True
9,porsche cayenne,gas,2,66.5,55.2,8,90,9295,False,True,False,False,False,False,True


In [9]:
merged_dataframe = pd.concat(
    [merged_dataframe, pd.get_dummies(dataframe['Fuel type'])],
    axis='columns'
)
merged_dataframe = merged_dataframe.drop(['Fuel type'], axis='columns')
merged_dataframe

Unnamed: 0,Car name,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,4wd,fwd,rwd,diesel,gas
0,audi fox,2,177.3,66.3,5,110,15250,False,False,True,False,False,True,False,False,True
1,bmw x1,2,176.8,64.8,6,121,20970,False,False,True,False,False,False,True,False,True
2,mazda glc deluxe,4,177.8,66.5,4,120,10795,False,False,True,False,False,True,False,True,False
3,buick century luxus (sw),4,190.9,70.3,5,123,28248,False,False,False,True,False,False,True,True,False
4,buick skylark,2,180.3,70.5,8,155,35056,True,False,False,False,False,False,True,False,True
5,mitsubishi g4,2,157.3,63.8,4,116,9959,False,True,False,False,False,True,False,False,True
6,peugeot 504,4,186.7,68.4,4,120,11900,False,False,True,False,False,False,True,False,True
7,peugeot 304,4,186.7,68.4,4,152,13200,False,False,True,False,False,False,True,True,False
8,porsche macan,2,168.9,68.3,4,143,22018,False,True,False,False,False,False,True,False,True
9,porsche cayenne,2,66.5,55.2,8,90,9295,False,True,False,False,False,False,True,False,True


In [10]:
merged_dataframe = pd.concat(
    [merged_dataframe, pd.get_dummies(dataframe['Car name'])],
    axis='columns'
)
merged_dataframe = merged_dataframe.drop(['Car name'], axis='columns')
merged_dataframe

Unnamed: 0,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,...,porsche macan,renault 5 gtl,subaru,subaru trezia,toyota corolla 1200,toyota corona mark ii,toyota mark ii,volkswagen 411 (sw),volkswagen super beetle,volvo 145e (sw)
0,2,177.3,66.3,5,110,15250,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,2,176.8,64.8,6,121,20970,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2,4,177.8,66.5,4,120,10795,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
3,4,190.9,70.3,5,123,28248,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,2,180.3,70.5,8,155,35056,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,2,157.3,63.8,4,116,9959,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
6,4,186.7,68.4,4,120,11900,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
7,4,186.7,68.4,4,152,13200,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
8,2,168.9,68.3,4,143,22018,False,True,False,False,...,True,False,False,False,False,False,False,False,False,False
9,2,66.5,55.2,8,90,9295,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False


In [11]:
merged_dataframe

Unnamed: 0,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,...,porsche macan,renault 5 gtl,subaru,subaru trezia,toyota corolla 1200,toyota corona mark ii,toyota mark ii,volkswagen 411 (sw),volkswagen super beetle,volvo 145e (sw)
0,2,177.3,66.3,5,110,15250,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,2,176.8,64.8,6,121,20970,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2,4,177.8,66.5,4,120,10795,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
3,4,190.9,70.3,5,123,28248,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,2,180.3,70.5,8,155,35056,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,2,157.3,63.8,4,116,9959,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
6,4,186.7,68.4,4,120,11900,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
7,4,186.7,68.4,4,152,13200,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
8,2,168.9,68.3,4,143,22018,False,True,False,False,...,True,False,False,False,False,False,False,False,False,False
9,2,66.5,55.2,8,90,9295,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False


In [12]:
merged_dataframe.dtypes

Number of doors               int64
Car length                  float64
Car width                   float64
Number of cylinders           int64
Horse power                   int64
Price                         int64
convertible                    bool
hatchback                      bool
sedan                          bool
wagon                          bool
4wd                            bool
fwd                            bool
rwd                            bool
diesel                         bool
gas                            bool
audi fox                       bool
bmw x1                         bool
buick century luxus (sw)       bool
buick skylark                  bool
mazda glc deluxe               bool
mitsubishi g4                  bool
peugeot 304                    bool
peugeot 504                    bool
porsche cayenne                bool
porsche macan                  bool
renault 5 gtl                  bool
subaru                         bool
subaru trezia               

In [13]:
from sklearn.linear_model import LinearRegression
linear_regression = LinearRegression()
linear_regression

In [14]:
#X - features
X = merged_dataframe.drop('Price', axis='columns')
#y - label = output
y = merged_dataframe['Price']


In [15]:
#train the model
linear_regression.fit(X, y)
#linear Regression with multiple variables

In [16]:
#predict
X
X.columns
linear_regression.predict(
    [[2, 164.5, 62, 5, 60, 1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])



array([9849.73505852])

In [17]:
linear_regression.score(X, y)

1.0

In [18]:
dataframe = merged_dataframe
dataframe

Unnamed: 0,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,...,porsche macan,renault 5 gtl,subaru,subaru trezia,toyota corolla 1200,toyota corona mark ii,toyota mark ii,volkswagen 411 (sw),volkswagen super beetle,volvo 145e (sw)
0,2,177.3,66.3,5,110,15250,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
1,2,176.8,64.8,6,121,20970,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
2,4,177.8,66.5,4,120,10795,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
3,4,190.9,70.3,5,123,28248,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4,2,180.3,70.5,8,155,35056,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,2,157.3,63.8,4,116,9959,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
6,4,186.7,68.4,4,120,11900,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
7,4,186.7,68.4,4,152,13200,False,False,True,False,...,False,False,False,False,False,False,False,False,False,False
8,2,168.9,68.3,4,143,22018,False,True,False,False,...,True,False,False,False,False,False,False,False,False,False
9,2,66.5,55.2,8,90,9295,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
