# Removing features with no variance

In [70]:
from sklearn.feature_selection import VarianceThreshold
import numpy as np
import pandas as pd

In [71]:
data = {'rand1':np.random.random((5,)),
        'const1':np.ones((5,)),
        'const2':np.ones((5,)),
        'const3':np.ones((5,)),
        'rand2':np.random.random((5,))}
df = pd.DataFrame(data=data)
display(df)

Unnamed: 0,rand1,const1,const2,const3,rand2
0,0.461731,1.0,1.0,1.0,0.578446
1,0.458634,1.0,1.0,1.0,0.040596
2,0.00816,1.0,1.0,1.0,0.855567
3,0.229969,1.0,1.0,1.0,0.177738
4,0.220083,1.0,1.0,1.0,0.801892


In [72]:
# Creating object
sel = VarianceThreshold(threshold=0)

# Fitting
sel.fit(df)

VarianceThreshold(threshold=0)

In [73]:
# Check columns
cols = df.columns
print(f'All columns: {list(cols)}')

sup = sel.get_support()
print(f'Support array: {sup}')

cols_sel = cols[sup]
print(f'Chosen columns: {list(cols_sel)}')

cols_rem = cols[np.logical_not(sup)]
print(f'Removed columns: {list(cols_rem)}')


All columns: ['rand1', 'const1', 'const2', 'const3', 'rand2']
Support array: [ True False False False  True]
Chosen columns: ['rand1', 'rand2']
Removed columns: ['const1', 'const2', 'const3']


In [74]:
df_sel = sel.transform(df)

temp = pd.DataFrame(data=df_sel, columns=cols_sel)
display(temp)

Unnamed: 0,rand1,rand2
0,0.461731,0.578446
1,0.458634,0.040596
2,0.00816,0.855567
3,0.229969,0.177738
4,0.220083,0.801892
