In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import f_regression
from sklearn.feature_selection import mutual_info_regression
from sklearn.feature_selection import RFE

In [None]:
def read_and_get_data():
    train_data = pd.read_csv('./training.csv')
    test_data = pd.read_csv('./testing.csv')

    x_train = train_data.values[:,2:28]
    y_train = train_data['Appliances'].values

    x_test = test_data.values[:,2:28]
    y_test = test_data['Appliances'].values
    return x_train, y_train, x_test, y_test

x_train, y_train, x_test, y_test = read_and_get_data()

### by removing features with low variance

In [None]:
def fea_sel_variancethreshold(x_pre):
    sel = VarianceThreshold(threshold=(.8*(1-.8)))
    x_post = sel.fit_transform(x_pre)
    return x_post

### by selection best features based on univatiate statistical tests.

In [None]:

def Kbest_by_f_regressor(num,x_pre,y):
    x_post = SelectKBest(f_regression, k=num).fit_transform(x_pre, y)
    return x_post

def Kbest_by_mutual_info(num,x_pre,y):
    x_post = SelectKBest(mutual_info_regression, k=num).fit_transform(x_pre, y)
    return x_post

def perc_by_f_regressor(num,x_pre,y):
    if(num > 100 or num < 0):
        print('wrong percentage: {}%'.format(num))
        return
    x_post = SelectPercentile(f_regression, percentile=num).fit_transform(x_pre, y)
    return x_post

def perc_by_mutual_info(num,x_pre,y):
    if(num > 100 or num < 0):
        print('wrong percentage: {}%'.format(num))
        return
    x_post = SelectPercentile(mutual_info_regression, percentile=num).fit_transform(x_pre, y)
    return x_post

### by an external estimator

In [None]:
def recursively_sel(regressor,num, x_pre,y):
    rfe = RFE(estimator=regressor, n_features_to_select=num, step = 1)
    rfe.fit(x_pre,y)
    x_post = rfe.transform(x_pre)
    return x_post

In [None]:
x_transformed = fea_sel_variancethreshold(x_train)
print(x_transformed)

### below call different functions to do feature selection

In [None]:
x_transformed = Kbest_by_f_regressor(2,x_train,y_train)
print(x_transformed)

In [None]:
x_transformed = Kbest_by_mutual_info(15,x_train,y_train)
print(x_transformed)

In [None]:
x_transformed = perc_by_f_regressor(80,x_train,y_train)
print(x_transformed)

In [None]:
x_transformed = perc_by_mutual_info(90,x_train,y_train)
print(x_transformed)

In [None]:
from sklearn.ensemble import RandomForestRegressor
# below rf can be changed to a linear regressor or a neural network regressor
regressor = RandomForestRegressor(n_estimators=100, max_depth=None)
x_transformed = recursively_sel(regressor, 15, x_train, y_train)
print(x_transformed)