In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Univariate selection

In [4]:
from sklearn.feature_selection import SelectKBest, chi2

In [5]:
select_k_best = SelectKBest(score_func=chi2, k=2)
X_train_k = select_k_best.fit_transform(X_train, y_train)

In [6]:
print("Selected features: ", X_train.columns[select_k_best.get_support()])

Selected features:  Index(['petal length (cm)', 'petal width (cm)'], dtype='object')


# Recursive Feature Elimination

In [7]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

In [8]:
model = LogisticRegression()
rfe = RFE(model, n_features_to_select=2)
X_train_rfe = rfe.fit_transform(X_train, y_train)

In [9]:
print("Selected features: ", X_train.columns[rfe.get_support()])

Selected features:  Index(['petal length (cm)', 'petal width (cm)'], dtype='object')


# Tree Based Feature Importance

In [10]:
from sklearn.ensemble import RandomForestClassifier

In [11]:
model = RandomForestClassifier()
model.fit(X_train, y_train)
importances = model.feature_importances_

In [12]:
feature_imp = pd.Series(importances, index=X_train.columns)
print("Feature importances:\n", feature_imp)

Feature importances:
 sepal length (cm)    0.100337
sepal width (cm)     0.029440
petal length (cm)    0.440371
petal width (cm)     0.429851
dtype: float64
