### Imports and reading data from csv

In [1]:
import pandas as pd
import numpy as np
from numpy import array

from sklearn.preprocessing import OneHotEncoder

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_regression
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [2]:
comb_data = pd.read_csv("SeqCombinedData.csv")
display(comb_data)

Unnamed: 0,Usage,LaggingCurrentReactivePower,LeadingCurrentReactivePower,CO2,LaggingCurrentPowerFactor,LeadingCurrentPowerFactor,NSM,WeekStatus,Year,Month,...,SeaLevelPressure,CloudCover,Visibility,SolarRadiation,SolarEnergy,UvIndex,Conditions,SunriseHour,LoadType,SunsetHour
0,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
1,3.28,3.56,0.00,0.0,67.76,100.00,5400,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
2,3.46,4.03,0.00,0.0,65.14,100.00,9900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
3,3.89,5.00,0.00,0.0,61.40,100.00,14400,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
4,3.56,4.07,0.00,0.0,65.84,100.00,18900,Weekday,2018,1,...,1026.9,3.3,9.0,139.9,12.0,6,Clear,7,Light_Load,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7003,3.42,0.00,9.79,0.0,100.00,32.98,64800,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7004,3.96,0.00,18.29,0.0,100.00,21.16,69300,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7005,3.38,0.00,13.43,0.0,100.00,24.41,73800,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17
7006,3.42,0.00,13.36,0.0,100.00,24.80,78300,Weekday,2018,12,...,1035.8,19.6,10.5,143.6,12.5,6,Clear,7,Light_Load,17


In [3]:
print(comb_data.columns)

Index(['Usage', 'LaggingCurrentReactivePower', 'LeadingCurrentReactivePower',
       'CO2', 'LaggingCurrentPowerFactor', 'LeadingCurrentPowerFactor', 'NSM',
       'WeekStatus', 'Year', 'Month', 'Day', 'Hours', 'Minutes', 'IsHoliday',
       'Season', 'TempMax', 'TempMin', 'Temp', 'FeelsLikeMax', 'FeelsLikeMin',
       'FeelsLike', 'Dew', 'Humidity', 'Precip', 'PrecipProb', 'PrecipCover',
       'PrecipType', 'SnowDepth', 'WindGust', 'WindSpeed', 'WindDir',
       'SeaLevelPressure', 'CloudCover', 'Visibility', 'SolarRadiation',
       'SolarEnergy', 'UvIndex', 'Conditions', 'SunriseHour', 'LoadType',
       'SunsetHour'],
      dtype='object')


### Encode and standardize the columns

In [4]:
comb_data_filt= comb_data[["Usage", "LaggingCurrentReactivePower", "LeadingCurrentReactivePower",
                          "CO2", "LaggingCurrentPowerFactor", "LeadingCurrentPowerFactor", "NSM",
                          "WeekStatus", "Hours", "Minutes", "IsHoliday", "Season", "Temp", "Dew",
                          "Humidity", "Precip", "PrecipProb", "PrecipCover", "PrecipType", "SnowDepth", "WindGust",
                          "WindSpeed", "WindDir", "SeaLevelPressure", "CloudCover", "Visibility", "SolarRadiation",
                          "SolarEnergy", "UvIndex", "Conditions", "SunriseHour", "LoadType", "SunsetHour"]]


comb_data_new = pd.get_dummies(comb_data_filt, columns=['IsHoliday','PrecipType','Season','Conditions','WeekStatus'])
print(comb_data_new.columns)

Index(['Usage', 'LaggingCurrentReactivePower', 'LeadingCurrentReactivePower',
       'CO2', 'LaggingCurrentPowerFactor', 'LeadingCurrentPowerFactor', 'NSM',
       'Hours', 'Minutes', 'Temp', 'Dew', 'Humidity', 'Precip', 'PrecipProb',
       'PrecipCover', 'SnowDepth', 'WindGust', 'WindSpeed', 'WindDir',
       'SeaLevelPressure', 'CloudCover', 'Visibility', 'SolarRadiation',
       'SolarEnergy', 'UvIndex', 'SunriseHour', 'LoadType', 'SunsetHour',
       'IsHoliday_0', 'IsHoliday_1', 'PrecipType_noprecip', 'PrecipType_rain',
       'PrecipType_rain,snow', 'PrecipType_snow', 'Season_Autumn',
       'Season_Spring', 'Season_Summer', 'Season_Winter', 'Conditions_Clear',
       'Conditions_Overcast', 'Conditions_Partially cloudy', 'Conditions_Rain',
       'Conditions_Rain, Overcast', 'Conditions_Rain, Partially cloudy',
       'Conditions_Snow', 'Conditions_Snow, Partially cloudy',
       'Conditions_Snow, Rain', 'Conditions_Snow, Rain, Overcast',
       'Conditions_Snow, Rain, Partially

In [5]:
data_cols_to_normalize = [
    'Usage', 'LaggingCurrentReactivePower', 'LeadingCurrentReactivePower', 'CO2', 'LaggingCurrentPowerFactor',
    'LeadingCurrentPowerFactor', 'NSM', 'Hours', 'Minutes', 'Temp', 'Dew', 'Humidity',
    'Precip', 'PrecipProb', 'PrecipCover', 'SnowDepth', 'WindGust', 'WindSpeed',
    'SeaLevelPressure', 'CloudCover', 'Visibility', 'SolarRadiation', 'SolarEnergy', 'UvIndex', 'SunriseHour', 'SunsetHour'
]
steel_scaler = StandardScaler()
comb_data_new[data_cols_to_normalize] = steel_scaler.fit_transform(comb_data_new[data_cols_to_normalize])

In [6]:
comb_data_copy = comb_data_new.copy()
load_type_vals = comb_data_copy.pop("LoadType")
#print(load_type_vals)

### Trying select k best

In [7]:
select = SelectKBest(k=20)
z = select.fit_transform(comb_data_copy,load_type_vals)

  f = msb / msw


In [8]:
filter1 = select.get_support()
features = array(comb_data_copy.columns)
print(features[filter1])

['Usage' 'LaggingCurrentReactivePower' 'LeadingCurrentReactivePower' 'CO2'
 'LaggingCurrentPowerFactor' 'LeadingCurrentPowerFactor' 'NSM' 'Hours'
 'Humidity' 'Precip' 'SeaLevelPressure' 'IsHoliday_0' 'IsHoliday_1'
 'PrecipType_rain,snow' 'Season_Winter' 'Conditions_Rain, Overcast'
 'Conditions_Snow, Partially cloudy'
 'Conditions_Snow, Rain, Partially cloudy' 'WeekStatus_Weekday'
 'WeekStatus_Weekend']


### Forward Feature Selection

In [9]:
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression

In [10]:
comb_sampled = comb_data_new.sample(n=500,random_state =20)
display(comb_sampled)
load_type_sampled = comb_sampled.pop("LoadType")

Unnamed: 0,Usage,LaggingCurrentReactivePower,LeadingCurrentReactivePower,CO2,LaggingCurrentPowerFactor,LeadingCurrentPowerFactor,NSM,Hours,Minutes,Temp,...,Conditions_Rain,"Conditions_Rain, Overcast","Conditions_Rain, Partially cloudy",Conditions_Snow,"Conditions_Snow, Partially cloudy","Conditions_Snow, Rain","Conditions_Snow, Rain, Overcast","Conditions_Snow, Rain, Partially cloudy",WeekStatus_Weekday,WeekStatus_Weekend
4818,-0.732357,-0.452295,-0.521998,-0.711735,-1.881218,0.514922,1.569756,1.516862,1.341641,0.749047,...,0,0,1,0,0,0,0,0,0,1
4287,-0.732357,-0.460862,-0.521998,-0.711735,-1.833735,0.514922,-0.703684,-0.650084,-1.341641,1.489413,...,0,0,1,0,0,0,0,0,0,1
2118,-0.726997,-0.564281,-0.521998,-0.711735,-0.978507,0.514922,-0.595425,-0.650084,1.341641,0.115344,...,0,0,0,0,0,0,0,0,0,1
1704,-0.031097,-0.388041,-0.410115,-0.094657,0.863318,0.513282,0.920202,0.939010,-0.447214,-0.223468,...,0,0,0,0,0,0,0,0,1,0
6767,1.657287,1.892057,-0.521998,1.756575,0.412754,0.514922,-0.126302,-0.072232,-1.341641,-0.700314,...,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1089,0.853295,0.146797,-0.464035,1.139498,0.835883,0.514922,0.811943,0.794547,0.447214,-0.945011,...,0,0,0,0,0,0,0,0,1,0
5724,-0.731166,-0.529400,-0.521998,-0.711735,-1.356791,0.514922,-1.244979,-1.227936,-0.447214,0.184361,...,0,0,1,0,0,0,0,0,1,0
4808,0.409312,0.003602,-0.521998,0.522420,0.776793,0.514922,-0.234561,-0.216695,-0.447214,0.749047,...,0,0,1,0,0,0,0,0,0,1
3460,-0.727890,-0.492071,-0.521998,-0.711735,-1.543031,0.514922,-0.956288,-0.939010,-0.447214,0.893355,...,0,1,0,0,0,0,0,0,0,1


In [11]:
wrapper_method = SequentialFeatureSelector(RandomForestClassifier(), n_features_to_select=20, direction='backward', cv=2)
wrapper_method.fit(comb_sampled,load_type_sampled)

In [12]:
wrapper_method.get_feature_names_out(comb_sampled.columns)

array(['Usage', 'LeadingCurrentReactivePower',
       'LaggingCurrentPowerFactor', 'LeadingCurrentPowerFactor', 'NSM',
       'Hours', 'WindDir', 'SunriseHour', 'SunsetHour', 'IsHoliday_0',
       'IsHoliday_1', 'PrecipType_rain,snow', 'PrecipType_snow',
       'Season_Autumn', 'Season_Spring', 'Conditions_Rain',
       'Conditions_Snow, Partially cloudy', 'Conditions_Snow, Rain',
       'Conditions_Snow, Rain, Overcast', 'WeekStatus_Weekday'],
      dtype=object)