Exercise 3

In [26]:
import numpy as np
import pandas as pd
from si.data.dataset import Dataset
from si.io.csv_file import read_csv
from si.feature_selection.select_percentile import SelectPercentile
from si.statistics.f_classification import f_classification

iris = read_csv('../datasets/iris/iris.csv', sep=',', features=True, label=True)

print("\n" + "=" * 60)
print("Testing SelectPercentile with iris dataset (Exercise 3.3)")
print("=" * 60)

percentiles_to_test = [25, 40, 50, 75, 100]

for percentile in percentiles_to_test:
    print(f"\n--- Percentile: {percentile}% ---")
    
    # Create selector
    selector = SelectPercentile(percentile=percentile)
    
    # Fit
    selector.fit(iris)
    
    print(f"F-values: {selector.F}")
    print(f"p-values: {selector.p}")
    
    # Transform
    iris_transformed = selector.transform(iris)
    
    print(f"Original shape: {iris.shape()}")
    print(f"Modified shape: {iris_transformed.shape()}")
    print(f"Selected features: {iris_transformed.features}")
    
    # Calculate how many features were selected
    n_selected = iris_transformed.X.shape[1]
    n_total = iris.X.shape[1]
    percentage_selected = (n_selected / n_total) * 100
    print(f"Actual percentage selected: {percentage_selected:.1f}%")


Testing SelectPercentile with iris dataset (Exercise 3.3)

--- Percentile: 25% ---
F-values: [ 119.26450218   47.3644614  1179.0343277   959.32440573]
p-values: [1.66966919e-31 1.32791652e-16 3.05197580e-91 4.37695696e-85]
Original shape: (150, 4)
Modified shape: (150, 1)
Selected features: ['petal_length']
Actual percentage selected: 25.0%

--- Percentile: 40% ---
F-values: [ 119.26450218   47.3644614  1179.0343277   959.32440573]
p-values: [1.66966919e-31 1.32791652e-16 3.05197580e-91 4.37695696e-85]
Original shape: (150, 4)
Modified shape: (150, 2)
Selected features: ['petal_length', 'petal_width']
Actual percentage selected: 50.0%

--- Percentile: 50% ---
F-values: [ 119.26450218   47.3644614  1179.0343277   959.32440573]
p-values: [1.66966919e-31 1.32791652e-16 3.05197580e-91 4.37695696e-85]
Original shape: (150, 4)
Modified shape: (150, 2)
Selected features: ['petal_length', 'petal_width']
Actual percentage selected: 50.0%

--- Percentile: 75% ---
F-values: [ 119.26450218   47.3