## **Importing Dependencies**

In [67]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn import metrics

## **Importing the Boston House Price Dataset**

In [68]:
boston = fetch_openml(name="boston", version = 1, as_frame = True)
df = boston.frame
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


## **Features & Target**

In [69]:
df['Price']= boston.target
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV,Price
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2,36.2


## **Dividing the dataset Into dependent and independent features**


In [70]:
X = df.iloc[:,:-1]
X = X.astype(float)
X.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [71]:
y = df['Price']
y = y.astype(float)
y.head()

Unnamed: 0,Price
0,24.0
1,21.6
2,34.7
3,33.4
4,36.2


## **Split the Data into Training & Test Data**

In [72]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=2)
print(X.shape, X_train.shape, X_test.shape)

(506, 14) (404, 14) (102, 14)


In [73]:
print(X_train.head(),X_test.head())

        CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \
321  0.18159   0.0   7.38   0.0  0.493  6.376  54.3  4.5404  5.0  287.0   
37   0.08014   0.0   5.96   0.0  0.499  5.850  41.5  3.9342  5.0  279.0   
286  0.01965  80.0   1.76   0.0  0.385  6.230  31.5  9.0892  1.0  241.0   
2    0.02729   0.0   7.07   0.0  0.469  7.185  61.1  4.9671  2.0  242.0   
25   0.84054   0.0   8.14   0.0  0.538  5.599  85.7  4.4546  4.0  307.0   

     PTRATIO       B  LSTAT  MEDV  
321     19.6  396.90   6.87  23.1  
37      19.2  396.90   8.77  21.0  
286     18.2  341.60  12.93  20.1  
2       17.8  392.83   4.03  34.7  
25      21.0  303.42  16.51  13.9            CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS   RAD    TAX  \
463   5.82115   0.0  18.10   0.0  0.713  6.513  89.9  2.8016  24.0  666.0   
152   1.12658   0.0  19.58   1.0  0.871  5.012  88.0  1.6102   5.0  403.0   
291   0.07886  80.0   4.95   0.0  0.411  7.148  27.7  5.1167   4.0  245.0   
183   0.10008   0.0   2.4

## **Linear Regression**

In [88]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, Y_train)
y_predicted = lin_reg.predict(X_test)
score1 = metrics.r2_score(Y_test, y_predicted)
print("R squared Error:",score1)
score2 = metrics.mean_absolute_error(Y_test, y_predicted)
print("Mean Absolute Error:",score2)
score3 = metrics.mean_squared_error(Y_test, y_predicted)
print("Mean Squared Error:",score3)
score4 = metrics.mean_squared_log_error(Y_test, y_predicted)
print("Mean Squared Log Error:",score4)

R squared Error: 1.0
Mean Absolute Error: 3.8792498625137825e-14
Mean Squared Error: 2.6979120297863037e-27
Mean Squared Log Error: 1.5355718895258028e-29


## **Compare a few actual Values VS Predicted Values**

In [89]:
comparision = pd.DataFrame({'Actual': Y_test, 'Predicted': y_predicted})
comparision.head()

Unnamed: 0,Actual,Predicted
463,20.2,20.2
152,15.3,15.3
291,37.3,37.3
183,32.5,32.5
384,8.8,8.8
