### Filter Approach

In [50]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest,chi2
from sklearn.datasets import load_digits

In [65]:
data = load_iris()
x= data.data
y = data.target

In [66]:
features = data.feature_names
features

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [67]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [68]:
pd.DataFrame(x,y,columns=features)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
0,4.9,3.0,1.4,0.2
0,4.7,3.2,1.3,0.2
0,4.6,3.1,1.5,0.2
0,5.0,3.6,1.4,0.2
...,...,...,...,...
2,6.7,3.0,5.2,2.3
2,6.3,2.5,5.0,1.9
2,6.5,3.0,5.2,2.0
2,6.2,3.4,5.4,2.3


In [69]:
selector = SelectKBest(score_func = chi2,k=2)

In [70]:
x_new = selector.fit_transform(x,y)

In [None]:
x_new

In [74]:
selected_indices = selector.get_support(indices=True)
selected_feature_names = [data.feature_names[i] for i in selected_indices]
print("Selected feature names:", selected_feature_names)
scores = selector.scores_
p_values = selector.pvalues_
feature_names = data.feature_names

Selected feature names: ['petal length (cm)', 'petal width (cm)']


In [77]:
feature_info = {}
for i in range(len(feature_names)):
    feature_info[feature_names[i]] = {
        "Score": scores[i],
        "p-value": p_values[i]
    }

In [78]:
for feature, info in feature_info.items():
    print(f"Feature: {feature}")
    print(f"  Score: {info['Score']:.4f}")
    print(f"  p-value: {info['p-value']:.4f}")


Feature: sepal length (cm)
  Score: 10.8178
  p-value: 0.0045
Feature: sepal width (cm)
  Score: 3.7107
  p-value: 0.1564
Feature: petal length (cm)
  Score: 116.3126
  p-value: 0.0000
Feature: petal width (cm)
  Score: 67.0484
  p-value: 0.0000


Using f_classif/ANOVA

In [79]:
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif


iris = load_iris()
X, y = iris.data, iris.target


k = 2


selector = SelectKBest(score_func=f_classif, k=k)


X_new = selector.fit_transform(X, y)

selected_features_indices = selector.get_support(indices=True)


selected_feature_names = [iris.feature_names[i] for i in selected_features_indices]


print("Selected feature names:", selected_feature_names)
scores = selector.scores_
p_values = selector.pvalues_
feature_names = iris.feature_names


feature_info = {}
for i in range(len(feature_names)):
    feature_info[feature_names[i]] = {
        "Score": scores[i],
        "p-value": p_values[i]
    }


for feature, info in feature_info.items():
    print(f"Feature: {feature}")
    print(f"  Score: {info['Score']:.4f}")
    print(f"  p-value: {info['p-value']:.4f}")


Selected feature names: ['petal length (cm)', 'petal width (cm)']
Feature: sepal length (cm)
  Score: 119.2645
  p-value: 0.0000
Feature: sepal width (cm)
  Score: 49.1600
  p-value: 0.0000
Feature: petal length (cm)
  Score: 1180.1612
  p-value: 0.0000
Feature: petal width (cm)
  Score: 960.0071
  p-value: 0.0000


### **Wrapper Approach**

In [84]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [91]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
features = iris.feature_names

In [97]:
features

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [87]:
model = LogisticRegression(max_iter=2000)

In [89]:
rfe = RFE(model, n_features_to_select=2)
rfe.fit(X, y)

In [90]:
selected_features = rfe.support_
print("Selected Features:", selected_features)
feature_ranking = rfe.ranking_
print("Feature Ranking:", feature_ranking)
X_selected = rfe.transform(X)

Selected Features: [False False  True  True]
Feature Ranking: [3 2 1 1]


In [98]:
selected_features_indices = rfe.get_support(indices=True)
selected_features_indices

array([2, 3])

In [101]:
for i in selected_features_indices:
 print("Selected Features:",features[i])
 print("Selected Feature Rank:",feature_ranking[i])

Selected Features: petal length (cm)
Selected Feature Rank: 1
Selected Features: petal width (cm)
Selected Feature Rank: 1
