# Energy Consumption Prediction By Appliances

In [1]:
#Import Required Libraries
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from time import time
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor,GradientBoostingRegressor
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.neural_network import MLPRegressor
from boruta import BorutaPy
from sklearn.model_selection import RandomizedSearchCV



In [2]:
energy_dataset = pd.read_csv('energydata_complete.csv')

In [3]:
energy_dataset.head()

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,2016-01-11 17:00:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016-01-11 17:10:00,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.483333,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016-01-11 17:20:00,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.366667,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,2016-01-11 17:30:00,50,40,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.410389,45.410389
4,2016-01-11 17:40:00,60,40,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,...,17.0,45.4,6.133333,733.9,92.0,5.666667,47.666667,4.9,10.084097,10.084097


In [4]:
def featureengineering(energy):
    # Converting datatype of Date column to date time
    energy['date'] = pd.to_datetime(energy['date'])

    # Removing rv2 feature
    del energy['rv2']
    
    # Removing T9 feature
    del energy['T9']
    
    # Removing T6 feature
    del energy['T6']
    
    # Removing rv1
    del energy['rv1']
    
    # To get the month for that record
    energy['month'] = energy['date'].dt.month

    # To get the specific time for that record
    #energy['time'] = energy['date'].dt.time

    #---------------------------------------------------
    p = []
    q = []
    for i in energy['date']:
        p.append(i.strftime("%j"))
        q.append(i.hour * 60 + i.minute)
        
    p=list(map(int, p))
    #---------------------------------------------------

    # To get the Day of the year for that record
    energy['DOY'] = pd.DataFrame({'DOY': p})

    # To get the minutes from midnight for that record
    energy['NSM'] = pd.DataFrame({'NSM': q})

    # To get the only date 
    #energy['Only_Date'] = energy['date'].dt.date
    #energy['Only_Date'] = pd.to_datetime(energy['Only_Date'])
    
    # To get the weekday for that record
    energy['Day of Week'] = energy['date'].dt.weekday
    
    # Now as we have DOY (Day of year) and MNM, we can remove date variable.
    del energy['date']
    
    # Generating training and testing dataset
    from sklearn.model_selection import train_test_split
    
    energy_train, energy_test = train_test_split(energy, test_size=0.2)

    #energy.sort_index()
    #energy_train = energy[0:int(energy.shape[0]*0.8)]
    #energy_test = energy[int(energy.shape[0]*0.8):]
    
    return energy_train, energy_test

In [5]:
def getXY(df):
    
    X = df.drop(['Appliances'],axis =1)
    Y = df['Appliances']
    return X,Y

In [6]:
def select_important_features(df_trn):
    X_trn, y_trn = getXY(df_trn)
    # load X and y
    # NOTE BorutaPy accepts numpy arrays only, hence the .values attribute
    X = X_trn.values
    y = y_trn.values

    # define random forest classifier, with utilising all cores and
    # sampling in proportion to y labels
    rf = RandomForestRegressor(n_estimators=20, max_depth=5)

    # define Boruta feature selection method
    feat_selector = BorutaPy(rf, n_estimators=10, verbose=2)

    # find all relevant features
    feat_selector.fit(X, y)

    # check selected features
    feat_selector.support_

    # check ranking of features
    feat_selector.ranking_

    # call transform() on X to filter it down to selected features
    X_filtered = feat_selector.transform(X)
    
    sel_columns = []
    for i in range(len(X_trn.columns)):
        print(str(X_trn.columns[i])+" : "+str(feat_selector.support_[i])+", rank: "+str(feat_selector.ranking_[i]))
        if feat_selector.ranking_[i] == 1:
            sel_columns.append(str(X_trn.columns[i]))
            
    return sel_columns

In [7]:
def x_sel_columns(X_trn,X_test,sel_columns):
    X_trn = X_trn[sel_columns]
    X_test = X_test[sel_columns]
    return X_trn,X_test

In [8]:
def model_Implementation(X_trn,y_trn,X_test,y_test):
    
    models = [LinearRegression(),
              Ridge(random_state=20),
              Lasso(random_state=20),
              ElasticNet(random_state=20),
              RandomForestRegressor(random_state=20),
              GradientBoostingRegressor(random_state=20),
              ExtraTreesRegressor(random_state=20),
              MLPRegressor(random_state=20)
              ]

    TestModels = pd.DataFrame()
    tmp = {}

    for model in models:
        # get model name
        m = str(model)
        tmp['Model'] = m[:m.index('(')]
        # fit model on training dataset

        start = time()
        model.fit(X_trn, y_trn)
        end = time()

        #Predictions and Validation for Testing and Training Set
        predictions = model.predict(X_test)
        predictions_trn = model.predict(X_trn)
        #R2 score
        tmp['R2_Test'] = round(r2_score(y_test,predictions),3)
        tmp['R2_Train'] = round(r2_score(y_trn,predictions_trn),3)
        #Mean Absolute Error(MAE)
        tmp['MAE_Test']= round(mean_absolute_error(y_test,predictions),3)
        tmp['MAE_Train']= round(mean_absolute_error(y_trn,predictions_trn),3)
        #Mean Squared Error(MSE)
        tmp['MSE_Test']= round(mean_squared_error(y_test,predictions),3)
        tmp['MSE_Train']= round(mean_squared_error(y_trn,predictions_trn),3)
        #Root Mean Squared Error (RMSE)
        tmp['RMSE_Test'] = round(np.sqrt(mean_squared_error(y_test,predictions)),3)
        tmp['RMSE_Train'] = round(np.sqrt(mean_squared_error(y_trn,predictions_trn)),3)
        #Mean Absolute Percentage Error
        tmp['MAPE_Test'] =  round(np.mean(np.abs((y_test - predictions) / y_test)) * 100,3)
        tmp['MAPE_Train'] =  round(np.mean(np.abs((y_trn - predictions_trn) / y_trn)) * 100,3)
        #Training and Testing Scores
        tmp['Training Score(%)'] = round(model.score(X_trn, y_trn) * 100,3)
        tmp['Testing Score(%)'] = round(model.score(X_test, y_test) * 100,3)
        # Training Time
        tmp['Training Time'] = round(end-start,3)
        # write obtained data
        TestModels = TestModels.append([tmp])

    TestModels.set_index('Model', inplace=True)
    #print('Iteration'+str(i))
    TestModels
    return TestModels

In [9]:
def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_features)
    errors = abs(predictions - test_labels)
    mape = 100 * np.mean(errors / test_labels)
    rmse = np.sqrt(mean_squared_error(test_labels,predictions))
    r2 = model.score(test_features, test_labels)
    print('Model Performance')
    print('R2 Test: {:0.3f}'.format(r2))
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('RMSE Test: {:0.4f}'.format(rmse))    
    return r2

In [10]:
def hypertuning(X_trn, y_trn,X_test, y_test):
    
    # Initialize the model based on best performance from above, We got ExtraTreesRegressor 
    sel_model = ExtraTreesRegressor(random_state=42)

    # Define the parameter subset

    param_grid = {
        "n_estimators": [10, 50, 100, 200, 250, 300, 500, 800],
        "max_features": ["auto", "sqrt", "log2"],
        "max_depth": [None, 10, 50, 100, 200, 500]
    }

    # Use Randomized search to try 20 subsets from parameter space with 5-fold cross validation
    random_search = RandomizedSearchCV(sel_model, param_grid, n_iter=20, scoring="r2", cv=5, n_jobs=-1, verbose=2, random_state=42)
    random_search.fit(X_trn, y_trn)
    base_model = ExtraTreesRegressor(random_state = 42)
    base_model.fit(X_trn, y_trn)
    base_accuracy = evaluate(base_model, X_test, y_test)
    best_model = random_search.best_estimator_
    best_accuracy = evaluate(best_model, X_test, y_test)
    print('Improvement of {:0.2f}%. for Extra Trees Regressor Model'.format( 100 * (best_accuracy - base_accuracy) / base_accuracy))

In [11]:
def exec_pipeline(energy):
    print('Steps 1. Process started------------')
    df_trn,df_test = featureengineering(energy)
    print('Steps 2.Feature Engineering completed------------')
    X_trn, y_trn = getXY(df_trn)
    X_test, y_test =  getXY(df_test)
    print('Steps 3. Starting Boruta Implementaion to select features------------')
    sel_columns = select_important_features(df_trn)
    print('Selected Features:'+str(sel_columns)+'\n')
    print('Steps 4. Features Selected, Training the models with selected features------------')
    X_trn,X_test = x_sel_columns(X_trn,X_test,sel_columns)
    results = model_Implementation(X_trn,y_trn,X_test,y_test)
    print('Steps 5. Hyper tuning Parameters of ExtraTreesRegressor Model------------')
    hypertuning(X_trn, y_trn,X_test, y_test)
    print('Steps 6. Process Completed------------')
    return results

In [12]:
%%time
results = exec_pipeline(energy_dataset)

Steps 1. Process started------------
Steps 2.Feature Engineering completed------------
Steps 3. Starting Boruta Implementaion to select features------------
Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	27
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	27
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	27
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	27
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	27
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	27
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	27
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	6
Tentative: 	12
Rejected: 	9


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	9 / 100
Confirmed: 	6
Tentative: 	12
Rejected: 	9


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	10 / 100
Confirmed: 	6
Tentative: 	12
Rejected: 	9


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	11 / 100
Confirmed: 	6
Tentative: 	12
Rejected: 	9


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	12 / 100
Confirmed: 	6
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	13 / 100
Confirmed: 	6
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	14 / 100
Confirmed: 	6
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	15 / 100
Confirmed: 	6
Tentative: 	10
Rejected: 	11


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	16 / 100
Confirmed: 	6
Tentative: 	9
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	17 / 100
Confirmed: 	6
Tentative: 	9
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	18 / 100
Confirmed: 	6
Tentative: 	9
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	19 / 100
Confirmed: 	6
Tentative: 	9
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	20 / 100
Confirmed: 	6
Tentative: 	9
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	21 / 100
Confirmed: 	6
Tentative: 	9
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	22 / 100
Confirmed: 	7
Tentative: 	8
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	23 / 100
Confirmed: 	7
Tentative: 	8
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	24 / 100
Confirmed: 	7
Tentative: 	8
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	25 / 100
Confirmed: 	7
Tentative: 	8
Rejected: 	12


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	26 / 100
Confirmed: 	7
Tentative: 	6
Rejected: 	14


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	27 / 100
Confirmed: 	7
Tentative: 	6
Rejected: 	14


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	28 / 100
Confirmed: 	7
Tentative: 	6
Rejected: 	14


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	29 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	30 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	31 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	32 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	33 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	34 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	35 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	36 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	37 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	38 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	39 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	40 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	41 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	42 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	43 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	44 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	45 / 100
Confirmed: 	7
Tentative: 	5
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	46 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	47 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	48 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	49 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	50 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	51 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	52 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	53 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	54 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	55 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	56 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	57 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	58 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	59 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	60 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	61 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	62 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	63 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	64 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	65 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	66 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	67 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	68 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	69 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	70 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	71 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	72 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	73 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	74 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	75 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	76 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	77 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	78 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	79 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	80 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	81 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	82 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	83 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	84 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	85 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	86 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	87 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	88 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	89 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	90 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	91 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	92 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	93 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	94 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	95 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	96 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	97 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	98 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


  hits = np.where(cur_imp[0] > imp_sha_max)[0]


Iteration: 	99 / 100
Confirmed: 	8
Tentative: 	4
Rejected: 	15


BorutaPy finished running.

Iteration: 	100 / 100
Confirmed: 	8
Tentative: 	3
Rejected: 	15
lights : True, rank: 1
T1 : False, rank: 4
RH_1 : False, rank: 16
T2 : False, rank: 14
RH_2 : True, rank: 1
T3 : True, rank: 1
RH_3 : True, rank: 1
T4 : False, rank: 14
RH_4 : False, rank: 6
T5 : False, rank: 12
RH_5 : False, rank: 3
RH_6 : False, rank: 12
T7 : False, rank: 12
RH_7 : False, rank: 2
T8 : True, rank: 1
RH_8 : False, rank: 4
RH_9 : False, rank: 8
T_out : False, rank: 9
Press_mm_hg : False, rank: 2
RH_out : True, rank: 1
Windspeed : False, rank: 7
Visibility : False, rank: 17
Tdewpoint : False, rank: 2
month : False, rank: 10
DOY : True, rank: 1
NSM : True, rank: 1
Day of Week : False, rank: 17
Selected Features:['lights', 'RH_2', 'T3', 'RH_3', 'T8', 'RH_out', 'DOY', 'NSM']

Steps 4. Features Selected, Training the models with selected features------------
Steps 5. Hyper tuning Parameters of ExtraTreesRegressor Model--

[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   39.7s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  2.3min finished


Model Performance
R2 Test: 0.550
Average Error: 31.7284 degrees.
RMSE Test: 68.3031
Model Performance
R2 Test: 0.608
Average Error: 28.8583 degrees.
RMSE Test: 63.8145
Improvement of 10.38%. for Extra Trees Regressor Model
Steps 6. Process Completed------------
Wall time: 3min 34s


In [13]:
results

Unnamed: 0_level_0,MAE_Test,MAE_Train,MAPE_Test,MAPE_Train,MSE_Test,MSE_Train,R2_Test,R2_Train,RMSE_Test,RMSE_Train,Testing Score(%),Training Score(%),Training Time
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
LinearRegression,54.903,54.836,64.028,63.371,9371.682,9489.172,0.097,0.1,96.807,97.412,9.697,10.004,0.016
Ridge,54.903,54.836,64.028,63.37,9371.682,9489.172,0.097,0.1,96.807,97.412,9.697,10.004,0.0
Lasso,54.812,54.72,63.846,63.124,9375.973,9493.499,0.097,0.1,96.83,97.435,9.656,9.963,0.016
ElasticNet,54.756,54.625,63.804,62.987,9400.582,9521.047,0.094,0.097,96.957,97.576,9.419,9.702,0.0
RandomForestRegressor,33.575,13.587,32.913,13.11,5108.024,959.737,0.508,0.909,71.47,30.98,50.781,90.898,0.615
GradientBoostingRegressor,46.388,45.302,50.776,49.285,7517.148,7046.176,0.276,0.332,86.701,83.942,27.567,33.173,0.457
ExtraTreesRegressor,31.175,0.001,30.789,0.001,4678.601,0.001,0.549,1.0,68.4,0.034,54.918,100.0,0.263
MLPRegressor,58.267,58.112,72.189,71.293,9009.402,9120.38,0.132,0.135,94.918,95.501,13.188,13.502,1.116
