In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score


iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names


df = pd.DataFrame(X, columns=feature_names)
df['target'] = y


for feature in feature_names:
    df[f'{feature}_squared'] = df[feature] ** 2


X = df.drop(columns='target')
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


pca = PCA(n_components=5)  
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)


rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)


importances = rf.feature_importances_
indices = np.argsort(importances)[::-1]

print("Feature ranking:")
for f in range(X_train.shape[1]):
    print(f"{f + 1}. feature {indices[f]} ({importances[indices[f]]})")


selector = SelectFromModel(rf, prefit=True, threshold='median')
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)


rf_selected = RandomForestClassifier(n_estimators=100, random_state=42)
rf_selected.fit(X_train_selected, y_train)
y_pred_selected = rf_selected.predict(X_test_selected)


accuracy_selected = accuracy_score(y_test, y_pred_selected)
print(f"Accuracy with selected features: {accuracy_selected:.4f}")


rf_pca = RandomForestClassifier(n_estimators=100, random_state=42)
rf_pca.fit(X_train_pca, y_train)
y_pred_pca = rf_pca.predict(X_test_pca)


accuracy_pca = accuracy_score(y_test, y_pred_pca)
print(f"Accuracy with PCA features: {accuracy_pca:.4f}")


Feature ranking:
1. feature 2 (0.27831504421819564)
2. feature 3 (0.23554734805977465)
3. feature 7 (0.19317099444177885)
4. feature 6 (0.14515396367269673)
5. feature 0 (0.04919570290737922)
6. feature 4 (0.042574818298975414)
7. feature 5 (0.0305656691701656)
8. feature 1 (0.025476459231033963)




Accuracy with selected features: 1.0000
Accuracy with PCA features: 0.9778
