### Feature Selection - Variance Threshold
**Description**: Remove features with low variance.

In [1]:
# write your code from here
import pandas as pd
import numpy as np
from sklearn.feature_selection import VarianceThreshold
import unittest

# ---------------------- Sample Data ----------------------
data = {
    'feature_constant': [1, 1, 1, 1, 1],
    'feature_low_variance': [0, 0, 1, 0, 0],
    'feature_high_variance': [10, 20, 15, 25, 30],
    'feature_zero_variance': [5, 5, 5, 5, 5]
}

df = pd.DataFrame(data)

# ---------------------- Error Handling ----------------------
def validate_input_data(df):
    if df.empty:
        raise ValueError("Input DataFrame is empty.")
    if not all(df.dtypes.apply(lambda dt: np.issubdtype(dt, np.number))):
        raise TypeError("All columns must be numeric for VarianceThreshold.")
    return True

try:
    validate_input_data(df)
except Exception as e:
    print(f"Validation Error: {e}")

# ---------------------- Variance Threshold ----------------------
# Remove features with variance <= 0.0 (default)
selector = VarianceThreshold(threshold=0.1)  # Adjust threshold if needed
reduced_data = selector.fit_transform(df)

# Get the selected feature names
selected_features = df.columns[selector.get_support(indices=True)]
reduced_df = pd.DataFrame(reduced_data, columns=selected_features)

print("Selected Features after Variance Thresholding:\n", selected_features)
print("\nReduced DataFrame:\n", reduced_df)

# ---------------------- Unit Tests ----------------------
class TestVarianceThreshold(unittest.TestCase):
    def test_non_empty_output(self):
        self.assertFalse(reduced_df.empty, "Reduced DataFrame should not be empty.")

    def test_expected_columns(self):
        expected_cols = {'feature_low_variance', 'feature_high_variance'}
        self.assertTrue(expected_cols.issubset(set(reduced_df.columns)),
                        "Missing expected columns after feature selection.")

    def test_removed_constant_features(self):
        self.assertNotIn('feature_constant', reduced_df.columns)
        self.assertNotIn('feature_zero_variance', reduced_df.columns)

unittest.TextTestRunner().run(unittest.TestLoader().loadTestsFromTestCase(TestVarianceThreshold))


...
----------------------------------------------------------------------
Ran 3 tests in 0.002s

OK


Selected Features after Variance Thresholding:
 Index(['feature_low_variance', 'feature_high_variance'], dtype='object')

Reduced DataFrame:
    feature_low_variance  feature_high_variance
0                     0                     10
1                     0                     20
2                     1                     15
3                     0                     25
4                     0                     30


<unittest.runner.TextTestResult run=3 errors=0 failures=0>