<a href="https://colab.research.google.com/github/its-anuskapalit/python/blob/main/selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the objective function to evaluate the fitness of solutions
def objective_function(features):
    selected_features = [i for i in range(len(features)) if features[i] == 1]
    if len(selected_features) == 0:
        return 0  # Penalize solutions with no features selected
    else:
        clf = RandomForestClassifier(random_state=42)
        clf.fit(X_train[:, selected_features], y_train)
        y_pred = clf.predict(X_test[:, selected_features])
        return accuracy_score(y_test, y_pred)

# Initialize parameters
population_size = 20
dimension = X.shape[1]  # Number of features
max_iter = 50
pa = 0.25  # Probability of a cuckoo egg being discovered

# Initialize the population
population = np.random.randint(2, size=(population_size, dimension))

# Perform Cuckoo Search Algorithm
for iter in range(max_iter):
    # Generate new solutions by performing cuckoo breeding
    new_population = population.copy()
    for i in range(population_size):
        # Choose a random solution (cuckoo host)
        j = np.random.randint(population_size)
        while j == i:
            j = np.random.randint(population_size)

        # Generate a new solution (cuckoo egg) by performing random walk
        step_size = np.random.uniform(low=-1, high=1, size=dimension)
        new_solution = population[i] + step_size

        # Perform simple bound check
        new_solution = np.clip(new_solution, 0, 1)

        # Evaluate the fitness of the new solution
        if np.random.rand() > pa:
            if objective_function(new_solution) > objective_function(population[i]):
                population[i] = new_solution

    # Select the best solutions to form the next generation
    sorted_indices = np.argsort([objective_function(sol) for sol in population])[::-1]
    population = population[sorted_indices[:population_size]]

# Select the best solution
best_solution = population[0]
selected_features = [i for i in range(len(best_solution)) if best_solution[i] == 1]

print("Selected Features:", selected_features)
print("Accuracy:", objective_function(best_solution))


Selected Features: [3]
Accuracy: 1.0


In [2]:
#  Greedy Forward Selection (GFS) algorithm
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def objective_function(features):
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train[:, features], y_train)
    y_pred = clf.predict(X_test[:, features])
    return accuracy_score(y_test, y_pred)

selected_features = []
remaining_features = set(range(X.shape[1]))  # Set of remaining features

while remaining_features:
    best_feature = None
    best_accuracy = 0

    for feature in remaining_features:
        candidate_features = selected_features + [feature]
        accuracy = objective_function(candidate_features)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_feature = feature


    selected_features.append(best_feature)
    remaining_features.remove(best_feature)

print("Selected Features:", selected_features)
print("Accuracy:", best_accuracy)


Selected Features: [3, 2, 0, 1]
Accuracy: 1.0


In [3]:
 # Recursive Feature Elimination (RFE)
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize a RandomForestClassifier
clf = RandomForestClassifier(random_state=42)

# Initialize RFE and specify the number of features to select
num_features_to_select = 2
rfe = RFE(estimator=clf, n_features_to_select=num_features_to_select)

# Fit RFE
rfe.fit(X_train, y_train)

# Get selected features
selected_features = [i for i, mask in enumerate(rfe.support_) if mask]

# Train the classifier using only selected features
clf.fit(X_train[:, selected_features], y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test[:, selected_features])

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

print("Selected Features:", selected_features)
print("Accuracy:", accuracy)


Selected Features: [2, 3]
Accuracy: 1.0
