### Importing Libraries

In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Importing Dataset

In [2]:
dataset = pd.read_csv('ToyotaCorolla.csv')


In [6]:
dataset.columns

Index(['Id', 'Model', 'Price', 'Age_08_04', 'Mfg_Month', 'Mfg_Year', 'KM',
       'Fuel_Type', 'HP', 'Met_Color', 'Color', 'Automatic', 'cc', 'Doors',
       'Cylinders', 'Gears', 'Quarterly_Tax', 'Weight', 'Mfr_Guarantee',
       'BOVAG_Guarantee', 'Guarantee_Period', 'ABS', 'Airbag_1', 'Airbag_2',
       'Airco', 'Automatic_airco', 'Boardcomputer', 'CD_Player',
       'Central_Lock', 'Powered_Windows', 'Power_Steering', 'Radio',
       'Mistlamps', 'Sport_Model', 'Backseat_Divider', 'Metallic_Rim',
       'Radio_cassette', 'Tow_Bar'],
      dtype='object')

### Concatenate different columns (Data Preprocessing)

In [11]:
df=pd.concat([dataset.iloc[:,2:4],dataset.iloc[:,6:7],dataset.iloc[:,8:9],dataset.iloc[:,12:14],dataset.iloc[:,15:18]],axis=1)

In [12]:
df.head()

Unnamed: 0,Price,Age_08_04,KM,HP,cc,Doors,Gears,Quarterly_Tax,Weight
0,13500,23,46986,90,2000,3,5,210,1165
1,13750,23,72937,90,2000,3,5,210,1165
2,13950,24,41711,90,2000,3,5,210,1165
3,14950,26,48000,90,2000,3,5,210,1165
4,13750,30,38500,90,2000,3,5,210,1170


In [37]:
df.rename(columns={'Age_08_04':'Age','Quarterly_Tax':'QTAX'},inplace=True)

In [38]:
df.head()

Unnamed: 0,Price,Age,KM,HP,cc,Doors,Gears,QTAX,Weight
0,13500,23,46986,90,2000,3,5,210,1165
1,13750,23,72937,90,2000,3,5,210,1165
2,13950,24,41711,90,2000,3,5,210,1165
3,14950,26,48000,90,2000,3,5,210,1165
4,13750,30,38500,90,2000,3,5,210,1170


### Selecting X and Y variables

In [44]:
x=df.iloc[:,1:9].values
y=df.iloc[:,:1].values

In [45]:
print(x)

[[   23 46986    90 ...     5   210  1165]
 [   23 72937    90 ...     5   210  1165]
 [   24 41711    90 ...     5   210  1165]
 ...
 [   71 17016    86 ...     5    69  1015]
 [   70 16916    86 ...     5    69  1015]
 [   76     1   110 ...     5    19  1114]]


In [46]:
print(y)

[[13500]
 [13750]
 [13950]
 ...
 [ 8500]
 [ 7250]
 [ 6950]]


### Splitting Data in train and test set

In [47]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [48]:
print(y_test)

[[ 7950]
 [10950]
 [14950]
 [ 7500]
 [ 9900]
 [ 8900]
 [ 6950]
 [15750]
 [11950]
 [ 9930]
 [ 8500]
 [10450]
 [11895]
 [ 9950]
 [ 6450]
 [15500]
 [11950]
 [ 6900]
 [10950]
 [ 9750]
 [ 7350]
 [ 8950]
 [13750]
 [14950]
 [19950]
 [ 9450]
 [11500]
 [ 8500]
 [ 9450]
 [ 8800]
 [11450]
 [10500]
 [13750]
 [ 8250]
 [10950]
 [11950]
 [ 8950]
 [12900]
 [ 9500]
 [ 9950]
 [ 8950]
 [ 9500]
 [ 9950]
 [15500]
 [10950]
 [ 7500]
 [ 9450]
 [ 9500]
 [10845]
 [ 8500]
 [19750]
 [ 8950]
 [ 7250]
 [ 6750]
 [10950]
 [19950]
 [11000]
 [ 8950]
 [17795]
 [10500]
 [ 5740]
 [ 7750]
 [10950]
 [10500]
 [12500]
 [12900]
 [ 8450]
 [ 7950]
 [19450]
 [11450]
 [ 9500]
 [ 4450]
 [11690]
 [ 7900]
 [14750]
 [14990]
 [ 9950]
 [ 7450]
 [ 6950]
 [ 8250]
 [ 6750]
 [ 6900]
 [ 5950]
 [12750]
 [11950]
 [10750]
 [ 8950]
 [11950]
 [ 8250]
 [ 7450]
 [ 6950]
 [10750]
 [17450]
 [ 9940]
 [ 7950]
 [12250]
 [12950]
 [10950]
 [12450]
 [ 6900]
 [ 8500]
 [10750]
 [ 9700]
 [ 9250]
 [ 7900]
 [19000]
 [18450]
 [ 8500]
 [13250]
 [10400]
 [12500]
 

### Model Creation and testing on train and test models

In [49]:
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(x_train, y_train)

LinearRegression()

### R-Squared

In [50]:
regressor.score(x,y)

0.821734577742414

### Predicting Test Score(Comparing predicted value of y and actual test value of y)

In [51]:
y_pred = regressor.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[  8289.01   7950.  ]
 [ 10517.    10950.  ]
 [ 13010.62  14950.  ]
 [  7666.6    7500.  ]
 [ 10486.85   9900.  ]
 [  9543.84   8900.  ]
 [  6140.54   6950.  ]
 [ 16218.43  15750.  ]
 [ 11289.5   11950.  ]
 [ 10499.56   9930.  ]
 [  6892.08   8500.  ]
 [ 11225.26  10450.  ]
 [ 11819.05  11895.  ]
 [  9744.71   9950.  ]
 [  7432.58   6450.  ]
 [ 16111.69  15500.  ]
 [ 11172.06  11950.  ]
 [  7751.06   6900.  ]
 [ 12849.54  10950.  ]
 [  8517.49   9750.  ]
 [  6731.21   7350.  ]
 [  6053.79   8950.  ]
 [ 11666.32  13750.  ]
 [ 13024.83  14950.  ]
 [ 18991.79  19950.  ]
 [  7748.68   9450.  ]
 [ 14050.56  11500.  ]
 [  5742.64   8500.  ]
 [  9247.59   9450.  ]
 [  7885.92   8800.  ]
 [ 10609.46  11450.  ]
 [  7647.1   10500.  ]
 [ 16093.34  13750.  ]
 [  7226.74   8250.  ]
 [ 11013.83  10950.  ]
 [ 12532.57  11950.  ]
 [  9733.86   8950.  ]
 [ 12230.39  12900.  ]
 [  9782.11   9500.  ]
 [  9006.26   9950.  ]
 [  8847.43   8950.  ]
 [ 10623.31   9500.  ]
 [  9952.96   9950.  ]
 [ 17009.32