## Recursive Feature Elimination (RFE)

In [1]:
# Cargar nuestros datos
import pandas as pd
import numpy as np

#leer los datos de iris
data = pd.read_csv('https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv')

data.head()



Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

X = data.drop('species', axis=1) #Matriz X de entrada
y = data['species'] #Vector Y de salida

#Dividimos nuestros datos en entrenamiento y validación
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.3, random_state=42)

#Generamos nuestro modelo
modelo = DecisionTreeClassifier()

#Aplicamos la eliminación recursiva
from sklearn.feature_selection import RFE

rfe = RFE(estimator=modelo, n_features_to_select=2)
rfe.fit(xtrain, ytrain)

#Características seleccionadas
caract_selec = X.columns[rfe.support_]
ranking = rfe.ranking_

print(f"Caracteristicas seleccionadas: {caract_selec.to_list()}")
print(f"Ranking de caracteristicas: {ranking}")

Caracteristicas seleccionadas: ['petal_length', 'petal_width']
Ranking de caracteristicas: [2 3 1 1]


In [15]:
#Validar nuestro resultado
xtrain_rfe = xtrain.iloc[:, rfe.support_]
xtest_rfe = xtest.iloc[:, rfe.support_]

modelo.fit(xtrain_rfe, ytrain)

from sklearn.metrics import accuracy_score
ypred = modelo.predict(xtest_rfe)
exact = accuracy_score(ytest, ypred)

print(f"Exactitud con RFE: {exact:.2f}")

Exactitud con RFE: 1.00
