In [1]:
from sklearn.datasets import load_boston
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error
import time
import pandas as pd

In [2]:
#Some Helper Functions 
def timer(f):
    start = time.time()
    res = f()
    end = time.time()
    print("Fitting : {}".format(end - start))
    return res


def build_model_for_data(data , target):
    x_train,x_test,y_train,y_test = train_test_split(data,target,random_state = 2)
    pipeline = make_pipeline(LinearRegression())
    model = timer(lambda : pipeline.fit(x_train , y_train))
    return(x_test,y_test,model)

In [3]:
#Load the Data
boston = load_boston()
boston_housing = pd.read_csv('BostonHousing.csv')
#print description 
print(boston['DESCR'])


.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [4]:
boston_housing.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [5]:
#Print Value Distributions 
print()
print()
print("Example Value distribution of features : ")
for x in boston['data'][0]:
    print(x)
    
min_max = MinMaxScaler()
boston_min_max = min_max.fit_transform(boston['data'])

print()
print()
print("Value distribution after min max : ")
for x in boston_min_max[0]:
    print(x)
    
std = StandardScaler()
boston_std = std.fit_transform(boston['data'])

print()
print("Value distribution after std : ")
for x in boston_std[0]:
    print(x)



Example Value distribution of features : 
0.00632
18.0
2.31
0.0
0.538
6.575
65.2
4.09
1.0
296.0
15.3
396.9
4.98


Value distribution after min max : 
0.0
0.18
0.06781524926686218
0.0
0.31481481481481477
0.5775052692086607
0.6416065911431514
0.26920313906646415
0.0
0.20801526717557245
0.2872340425531916
0.9999999999999999
0.08967991169977926

Value distribution after std : 
-0.4197819386460084
0.2848298609673567
-1.2879094989577484
-0.2725985670699254
-0.14421743255530006
0.4136718893017465
-0.1200134161980508
0.1402136034929299
-0.9828428567665046
-0.6666082090210975
-1.4590003802772087
0.44105193260704206
-1.075562304567866


In [6]:
#Print Model Evaluation
print()
print("Without : ")

x_test,y_test,model = build_model_for_data(boston['data'],boston['target'])
prediction = model.predict(x_test)
print("MSE : {}".format(mean_squared_error(y_test , prediction)))
print()
print('MinMax : ')
x_test,y_test,model = build_model_for_data(boston_min_max , boston['target'])
prediction = model.predict(x_test)
print("MSE : {}".format(mean_squared_error(y_test , prediction)))
print()
print("Std : ")
x_test , y_test , model = build_model_for_data(boston_std , boston['target'])
prediction = model.predict(x_test)
print("MSE : {}".format(mean_squared_error(y_test,prediction)))


Without : 
Fitting : 1.9016838073730469
MSE : 22.160198304875575

MinMax : 
Fitting : 0.0009989738464355469
MSE : 22.16019830487554

Std : 
Fitting : 0.0010006427764892578
MSE : 22.16019830487552
