In [1]:
# importing the required modules
from sklearn.datasets import make_regression, make_classification
import numpy as np

# creating a random number
np.random.seed(37)

# creating a classifitaion data
def get_classification_data():
    return make_classification(**{
        'n_samples': 2000,
        'n_features': 20,
        'n_informative': 2,
        'n_redundant': 2,
        'n_repeated': 0,
        'n_classes': 2,
        'n_clusters_per_class': 2,
        'random_state': 37
    })

# calling classification dataset
x, y = get_classification_data()

In [2]:
# importing the required modules
from sklearn.feature_selection import SelectKBest, f_regression

# Create the selector object - we have selected 10 features
selector = SelectKBest(f_regression, k=10)

# sklearn feature selector
X_new = selector.fit_transform(x, y)

# shape of the x_new
print("Shape of orignal data: ",x.shape)
print("Shape of extracted data: ",X_new.shape)

Shape of orignal data:  (2000, 20)
Shape of extracted data:  (2000, 10)


In [3]:
# importing the required modules
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier

# Create the model
model = RandomForestClassifier()

# RFE in sklearn feature selector
rfe = RFE(model, n_features_to_select=10)
X_new = rfe.fit_transform(x, y)

# shape of the x_new
print("Shape of orignal data: ",x.shape)
print("Shape of extracted data: ",X_new.shape)

Shape of orignal data:  (2000, 20)
Shape of extracted data:  (2000, 10)


In [4]:
from sklearn.ensemble import RandomForestClassifier

# Create the model and fit it to the data
model = RandomForestClassifier()
model.fit(x, y)

# Get the feature importances
importances = model.feature_importances_

# Sort the features by importance
sorted_indexes = importances.argsort()[::-1]

# Select the top 10 features
X_new = x[:, sorted_indexes[:10]]


# shape of the x_new
print("Shape of orignal data: ",x.shape)
print("Shape of extracted data: ",X_new.shape)


Shape of orignal data:  (2000, 20)
Shape of extracted data:  (2000, 10)


In [10]:
# Importing the feature selection
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.neighbors import KNeighborsClassifier

# applying the knn_model
knn_model = KNeighborsClassifier(n_neighbors=3)

# number of feature to select
sfs = SequentialFeatureSelector(knn_model, n_features_to_select=10)

# fitting the model
sfs.fit(x, y)

# sklearn sequential feature selector
SequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),
                  n_features_to_select=10)
sfs.get_support()
sfs.transform(x).shape
(2000, 10)

(2000, 10)

In [11]:
# Importing the modules
from sklearn.svm import SVC
from sklearn.feature_selection import RFE

# Define the model
model = SVC(kernel='linear')

# Create the RFE object and fit it to the data
selector = RFE(model, n_features_to_select=10, verbose=2)

# fitting the model
X_new = selector.fit_transform(x, y)

# shape of the x_new
print("Shape of orignal data: ",x.shape)
print("Shape of extracted data: ",X_new.shape)

Fitting estimator with 20 features.
Fitting estimator with 19 features.
Fitting estimator with 18 features.
Fitting estimator with 17 features.
Fitting estimator with 16 features.
Fitting estimator with 15 features.
Fitting estimator with 14 features.
Fitting estimator with 13 features.
Fitting estimator with 12 features.
Fitting estimator with 11 features.
Shape of orignal data:  (2000, 20)
Shape of extracted data:  (2000, 10)


In [12]:
# importing the required modules
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline


# Define the steps in the pipeline
steps = [
    ('scaler', StandardScaler()),  # Feature scaling
    ('selector', SelectKBest(f_classif, k=10)),  # Feature selection
    ('model', SVC(kernel='linear'))  # Model fitting
]

# Create the pipeline
pipeline = Pipeline(steps)

# Fit the pipeline to the data
pipeline.fit(x, y)
