# *Importing Modules*

In [32]:
import numpy as np
import pandas as pd
from sklearn.feature_selection import chi2,f_classif,SelectFromModel,SelectKBest,SelectPercentile,f_regression
from matplotlib import pyplot as plt

## *Reading the data frame*

In [33]:
data_frame=pd.read_csv(r'H:\House.csv')
data_frame.head()

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA


In [34]:
x=data_frame.loc[:,'bedrooms':'yr_renovated']
y=data_frame['price'] # the feature that will go to the model must be numerical 

In [35]:
y

0       3.130000e+05
1       2.384000e+06
2       3.420000e+05
3       4.200000e+05
4       5.500000e+05
            ...     
4595    3.081667e+05
4596    5.343333e+05
4597    4.169042e+05
4598    2.034000e+05
4599    2.206000e+05
Name: price, Length: 4600, dtype: float64

## *1.SelectedPercentile*

In [36]:
selected_x=SelectPercentile(score_func=f_regression,percentile=50)
X=selected_x.fit_transform(x,y)
X.shape

(4600, 6)

In [37]:
X

array([[3.00e+00, 1.50e+00, 1.34e+03, 0.00e+00, 1.34e+03, 0.00e+00],
       [5.00e+00, 2.50e+00, 3.65e+03, 4.00e+00, 3.37e+03, 2.80e+02],
       [3.00e+00, 2.00e+00, 1.93e+03, 0.00e+00, 1.93e+03, 0.00e+00],
       ...,
       [3.00e+00, 2.50e+00, 3.01e+03, 0.00e+00, 3.01e+03, 0.00e+00],
       [4.00e+00, 2.00e+00, 2.09e+03, 0.00e+00, 1.07e+03, 1.02e+03],
       [3.00e+00, 2.50e+00, 1.49e+03, 0.00e+00, 1.49e+03, 0.00e+00]])

In [38]:
selected_x.get_feature_names_out()

array(['bedrooms', 'bathrooms', 'sqft_living', 'view', 'sqft_above',
       'sqft_basement'], dtype=object)

In [39]:
selected_x.get_support()

array([ True,  True,  True, False, False, False,  True, False,  True,
        True, False, False])

## *2.SelectFromModel*

In [41]:
from sklearn.linear_model import LinearRegression
model=LinearRegression()
selected_x2=SelectFromModel(estimator=model)
x2=selected_x2.fit_transform(x,y)
x2

array([[3. , 1.5, 0. ],
       [5. , 2.5, 0. ],
       [3. , 2. , 0. ],
       ...,
       [3. , 2.5, 0. ],
       [4. , 2. , 0. ],
       [3. , 2.5, 0. ]])

In [42]:
selected_x2.get_feature_names_out()

array(['bedrooms', 'bathrooms', 'waterfront'], dtype=object)

In [43]:
selected_x2.get_support()

array([ True,  True, False, False, False,  True, False, False, False,
       False, False, False])

## *3.SelectKbest*

In [44]:
selected_x3=SelectKBest(score_func=f_regression,k=6)
x3=selected_x3.fit_transform(x,y)
x3

array([[3.00e+00, 1.50e+00, 1.34e+03, 0.00e+00, 1.34e+03, 0.00e+00],
       [5.00e+00, 2.50e+00, 3.65e+03, 4.00e+00, 3.37e+03, 2.80e+02],
       [3.00e+00, 2.00e+00, 1.93e+03, 0.00e+00, 1.93e+03, 0.00e+00],
       ...,
       [3.00e+00, 2.50e+00, 3.01e+03, 0.00e+00, 3.01e+03, 0.00e+00],
       [4.00e+00, 2.00e+00, 2.09e+03, 0.00e+00, 1.07e+03, 1.02e+03],
       [3.00e+00, 2.50e+00, 1.49e+03, 0.00e+00, 1.49e+03, 0.00e+00]])

In [45]:
selected_x3.get_feature_names_out()

array(['bedrooms', 'bathrooms', 'sqft_living', 'view', 'sqft_above',
       'sqft_basement'], dtype=object)

In [46]:
selected_x3.get_support()

array([ True,  True,  True, False, False, False,  True, False,  True,
        True, False, False])

In [47]:
selected_x3.get_params()

{'k': 6,
 'score_func': <function sklearn.feature_selection._univariate_selection.f_regression(X, y, *, center=True, force_finite=True)>}

# *Good Luck*