In [1]:
from sklearn.datasets import load_boston
boston_dataset=load_boston()

In [2]:
boston_dataset.feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

In [3]:

print(boston_dataset.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [4]:
print(boston_dataset.keys())

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])


In [5]:
import pandas as pd

In [6]:
boston = pd.DataFrame(boston_dataset.data,columns=boston_dataset.feature_names)

In [7]:
boston.head(2)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14


In [8]:
boston['MEDV']=boston_dataset.target

In [9]:
boston

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


# isoloation forset

In [10]:
from sklearn.ensemble import IsolationForest

In [11]:
x=boston.iloc[:,0:12]
y=boston.iloc[:,12]

In [12]:
from sklearn.model_selection import train_test_split



In [13]:
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.80,random_state=1)

In [14]:
iso=IsolationForest(contamination=0.1)

In [15]:
yhat1=iso.fit_predict(x_train)

In [16]:
yhat1

array([ 1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,
       -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1, -1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
        1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1, -1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1,  1,  1,  1

In [17]:
mask=yhat1 !=-1

In [18]:
mask

array([ True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True, False, False,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
       False,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,

In [19]:
New_x_train,New_y_train=x_train[mask],y_train[mask]

In [20]:
x_train.shape

(404, 12)

In [21]:
New_x_train.shape

(363, 12)

In [22]:
#yhat2=iso.fit_predict(x_test)

In [23]:
#yhat2

array([ 1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1, -1,  1,
        1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
        1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1])

In [24]:
#mask1=yhat2 !=-1

In [25]:
#mask1

array([ True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True, False,  True,  True,  True,
        True,  True, False,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True, False,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True])

In [None]:
#New_x_test,New_y_test=x_test[mask1],y_test[mask1]

In [22]:

#from sklearn.model_selection import train_test_split


In [23]:
from sklearn.linear_model import LinearRegression

In [24]:
model=LinearRegression()

In [25]:
model.fit(New_x_train,New_y_train)

LinearRegression()

In [26]:
y_pred=model.predict(x_test)

In [27]:
y_pred

array([ 8.32075952,  8.57797947, 14.38911093,  8.62451666, 19.64033779,
       15.25704248,  6.01179783, 16.34466653, 17.21677531,  9.51944247,
       11.64228492,  5.32553719, 12.78450185, 10.19100157, 14.41758582,
       21.72895138, 17.44726644,  1.42507426,  8.99570148, 21.48787839,
       14.91874195, 21.13379086, 12.04806651,  8.09201678,  6.66583631,
       25.09895035, 17.22518051, 11.15814452,  7.12153063, 20.83496146,
        8.68251892, 15.48258706,  0.25198438, 17.87498846,  7.92139764,
        9.90040926, 16.86278469,  6.10013486, 30.82505394, 15.15823231,
        5.30690395,  9.03205335, 10.41103364, 17.43719715, 17.92109967,
       17.39086319, 10.2643315 , 20.0976788 , 17.23074077, 15.51366694,
        9.73556603, 10.05626889,  9.69174551, 15.70619034, 21.05422067,
       11.35834486, 17.85330717, 10.34390707, 14.59444949,  2.48783971,
       18.46748544,  8.14587375, 19.08934017, 18.49304047,  9.75801967,
        6.2005224 ,  2.46393286,  8.87362108, 10.29470612, 16.36

In [31]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.5484068010146639

In [29]:
y_test.shape

(102,)

In [30]:
x_test.shape

(102, 12)

In [32]:
from sklearn.metrics import accuracy_score
regresssion_model_sklearn_accuracy = model.score(y_test,y_pred)
regresssion_model_sklearn_accuracy

ValueError: Expected 2D array, got 1D array instead:
array=[ 7.53  7.18 18.8   8.1  10.19 11.12  4.7  13.04 17.92  8.81  7.74  4.61
 13.61 10.24 11.72 12.13 23.24  4.63  7.43 29.53 11.5  16.65  9.28  6.12
  4.67 24.39 20.34 10.53  7.26 20.85  5.49 18.46  2.88 16.44  9.09 13.59
 17.79  5.33 23.34 14.98  7.34  6.75  7.6  24.1  18.03 22.74  1.92 16.35
  9.81 10.63 11.66 11.22  5.21 11.1  29.68  6.73 29.97  5.98 14.67  3.95
 15.02  6.53 19.92 17.64 29.55  5.04  2.47  8.65 10.4  11.64  6.15 28.28
 14.   11.98 14.09 10.42  2.96  8.23  7.39  3.56 15.39 10.3   5.39 23.27
  5.98  4.97 19.31  5.08 14.1  16.96  5.64  5.12 18.07 12.43 14.52 13.27
  9.67  8.16  4.14 13.   15.69 18.85].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [47]:
New_x_train

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B
42,0.14150,0.0,6.91,0.0,0.448,6.169,6.6,5.7209,3.0,233.0,17.9,383.37
58,0.15445,25.0,5.13,0.0,0.453,6.145,29.2,7.8148,8.0,284.0,19.7,390.68
385,16.81180,0.0,18.10,0.0,0.700,5.277,98.1,1.4261,24.0,666.0,20.2,396.90
78,0.05646,0.0,12.83,0.0,0.437,6.232,53.7,5.0141,5.0,398.0,18.7,386.40
424,8.79212,0.0,18.10,0.0,0.584,5.565,70.6,2.0635,24.0,666.0,20.2,3.65
...,...,...,...,...,...,...,...,...,...,...,...,...
255,0.03548,80.0,3.64,0.0,0.392,5.876,19.1,9.2203,1.0,315.0,16.4,395.18
72,0.09164,0.0,10.81,0.0,0.413,6.065,7.8,5.2873,4.0,305.0,19.2,390.91
396,5.87205,0.0,18.10,0.0,0.693,6.405,96.0,1.6768,24.0,666.0,20.2,396.90
235,0.33045,0.0,6.20,0.0,0.507,6.086,61.5,3.6519,8.0,307.0,17.4,376.75
