# Quasi Constant Removal

## Data Importation

In [0]:
from google.colab import files
uploaded = files.upload()

Saving x_train.pkl to x_train.pkl
Saving y_train.pkl to y_train.pkl
Saving y_train_time.pkl to y_train_time.pkl


In [0]:
import pandas as pd

X_train = pd.read_pickle("./x_train.pkl")
y_train = pd.read_pickle("./y_train.pkl")
y_train_time = pd.read_pickle("./y_train_time.pkl")

In [0]:
import numpy as np
from sklearn.feature_selection import VarianceThreshold

In [0]:
thresh=[0.01,0.05,0.1,0.15]

In [0]:
for t in thresh :
  qconstant_filter = VarianceThreshold(threshold=t)
  qconstant_filter=qconstant_filter.fit(X_train)
  threshold=1-t
  print(f"\n\nThreshold :{threshold}")
  print(f"\nRemaining features labels : \t{X_train.columns[qconstant_filter.get_support()]}")
  print(f"Remaining features count : {len(X_train.columns[qconstant_filter.get_support()])}")
  qconstant_columns = [column for column in X_train.columns
                    if column not in X_train.columns[qconstant_filter.get_support()]]
  print(f"\nRemoved features labels {qconstant_columns}")
  print(f"Removed features count : {len(qconstant_columns)}")




Threshold :0.99

Remaining features labels : 	Index(['v_Vel', 'lateral_current_lane', 'v_Vel_Ref1', 'v_Vel_Ref2',
       'v_Vel_Ref3', 'longit_pos_vehicle1', 'longit_pos_vehicle2',
       'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2',
       'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_follow1', 'v_Vel_preced2',
       'v_Vel_follow2', 'longit_pos_preced1', 'longit_pos_follow1',
       'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced1',
       'iTTC_follow1', 'iTTC_preced2', 'iTTC_follow2'],
      dtype='object')
Remaining features count : 23

Removed features labels []
Removed features count : 0


Threshold :0.95

Remaining features labels : 	Index(['v_Vel', 'lateral_current_lane', 'v_Vel_Ref1', 'v_Vel_Ref2',
       'v_Vel_Ref3', 'longit_pos_vehicle1', 'longit_pos_vehicle2',
       'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2',
       'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_follow1', 'v_Vel_preced2',
       'v_Vel_follow2', 'longit_pos_preced1', 'longit_

In [0]:
qconstant_filter = VarianceThreshold(threshold=0.01)
#setting the variance threshold to 0.01 the find the quasi-sontant features. In other words, remove feature column where approximately 99% of the values are similar.

In [0]:
qconstant_filter=qconstant_filter.fit(X_train)

**Number and labels of non-quasi-constant features**

In [0]:
len(X_train.columns[qconstant_filter.get_support()])

23

In [0]:
X_train.columns[qconstant_filter.get_support()]

Index(['v_Vel', 'lateral_current_lane', 'v_Vel_Ref1', 'v_Vel_Ref2',
       'v_Vel_Ref3', 'longit_pos_vehicle1', 'longit_pos_vehicle2',
       'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2',
       'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_follow1', 'v_Vel_preced2',
       'v_Vel_follow2', 'longit_pos_preced1', 'longit_pos_follow1',
       'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced1',
       'iTTC_follow1', 'iTTC_preced2', 'iTTC_follow2'],
      dtype='object')

**quasi-constant features (to be removed)**

In [0]:
qconstant_columns = [column for column in X_train.columns
                    if column not in X_train.columns[qconstant_filter.get_support()]]

print(len(qconstant_columns))

0


**Filter features**

In [0]:
X_train.drop(labels=qconstant_columns, axis=1, inplace=True)

#Return a dataframe

**or**

In [0]:
X_train_filter=qconstant_filter.transform(X_train)

#return an array

**Function for the main script**

In [0]:

def rmv_quasi_constant(data,your_threshold):
    thresh=[0.01,0.05,0.1,0.15,your_threshold]
    for t in thresh :
      qconstant_filter = VarianceThreshold(threshold=t)
      qconstant_filter=qconstant_filter.fit(data)
      threshold=1-t
      print(f"\n\nThreshold :{threshold}")
      print(f"\nRemaining features labels : \t{data.columns[qconstant_filter.get_support()]}")
      print(f"Remaining features count : {len(data.columns[qconstant_filter.get_support()])}")
      qconstant_columns = [column for column in data.columns if column not in data.columns[qconstant_filter.get_support()]]
      print(f"\nRemoved features labels {qconstant_columns}")
      print(f"Removed features count : {len(qconstant_columns)}")


In [0]:
rmv_quasi_constant(X_train, 0.2)



Threshold :0.99

Remaining features labels : 	Index(['v_Vel', 'lateral_current_lane', 'v_Vel_Ref1', 'v_Vel_Ref2',
       'v_Vel_Ref3', 'longit_pos_vehicle1', 'longit_pos_vehicle2',
       'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2',
       'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_follow1', 'v_Vel_preced2',
       'v_Vel_follow2', 'longit_pos_preced1', 'longit_pos_follow1',
       'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced1',
       'iTTC_follow1', 'iTTC_preced2', 'iTTC_follow2'],
      dtype='object')
Remaining features count : 23

Removed features labels []
Removed features count : 0


Threshold :0.95

Remaining features labels : 	Index(['v_Vel', 'lateral_current_lane', 'v_Vel_Ref1', 'v_Vel_Ref2',
       'v_Vel_Ref3', 'longit_pos_vehicle1', 'longit_pos_vehicle2',
       'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2',
       'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_follow1', 'v_Vel_preced2',
       'v_Vel_follow2', 'longit_pos_preced1', 'longit_