In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from collections import defaultdict

# Initialize CFS parameters
class CFS:
    def __init__(self, learner=None, episode_size=100, iterations=100, epsilon=0.9, learning_rate=0.1, discount_factor=0.01):
        self.learner = learner if learner else DecisionTreeClassifier()
        self.episode_size = episode_size
        self.iterations = iterations
        self.epsilon = epsilon
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.q_values = defaultdict(lambda: 1.0)  # Optimistic initialization

    def run(self, X, y):
        n_samples, n_features = X.shape
        selected_features = set()

        for episode in range(self.iterations):
            current_state = "s0"  # Initial state before any feature is selected
            available_features = set(range(n_features))

            while available_features:
                action = self.select_action(current_state, available_features)
                available_features.remove(action)
                selected_features.add(action)

                # Compute the internal reward (change in error)
                reward = self.calculate_reward(X, y, selected_features)
                self.update_q_values(current_state, action, reward)

                current_state = f"s{action}"

            # Decay epsilon and learning rate over time
            self.epsilon = max(0.1, self.epsilon * 0.99)
            self.learning_rate = max(0.01, self.learning_rate * 0.99)

        return list(selected_features)

    def select_action(self, state, available_features):
        if np.random.rand() < self.epsilon:
            # Exploration: choose a random action
            return np.random.choice(list(available_features))
        else:
            # Exploitation: choose the action with the highest Q-value
            return max(available_features, key=lambda a: self.q_values[(state, a)])

    def calculate_reward(self, X, y, selected_features):
        if not selected_features:
            return 0
        X_subset = X[:, list(selected_features)]
        self.learner.fit(X_subset, y)
        y_pred = self.learner.predict(X_subset)
        error = 1 - accuracy_score(y, y_pred)
        return -error  # Reward is the negative error

    def update_q_values(self, state, action, reward):
        max_next_q = max(self.q_values[(f"s{action}", a)] for a in range(X.shape[1])) if state != "s0" else 0
        self.q_values[(state, action)] += self.learning_rate * (reward + self.discount_factor * max_next_q - self.q_values[(state, action)])

# Example usage:
# X, y = load_your_dataset()
# cfs = CFS()
# selected_features = cfs.run(X, y)
# print("Selected Features:", selected_features)


In [5]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from collections import defaultdict
from sklearn.datasets import make_classification

# ✅ Similarity: Uses a synthetic dataset for testing, like in the paper where real-world datasets were used.
def generate_dataset():
    X, y = make_classification(n_samples=100, n_features=5, n_informative=2, 
                               n_redundant=0, random_state=42)
    return X, y

# ✅ Similarity: Implements reinforcement learning (Q-learning) for feature selection.
class CFS:
    def __init__(self, learner=None, episode_size=10, iterations=20, 
                 epsilon=0.9, learning_rate=0.1, discount_factor=0.01):
        self.learner = learner if learner else DecisionTreeClassifier()
        self.episode_size = episode_size
        self.iterations = iterations
        self.epsilon = epsilon  # ✅ Similarity: Implements ε-greedy exploration-exploitation.
        self.learning_rate = learning_rate  # ✅ Similarity: Uses a learning rate for Q-updates.
        self.discount_factor = discount_factor  # ✅ Similarity: Uses a discount factor like in reinforcement learning.
        self.q_values = defaultdict(lambda: 1.0)  # ✅ Similarity: Q-table with optimistic initialization.

    def run(self, X, y):
        n_samples, n_features = X.shape
        best_feature_sets = []  # Store best feature subsets across episodes.

        for episode in range(self.iterations):
            current_state = "s0"  # ✅ Similarity: Uses MDP-style state-action pairs.
            available_features = set(range(n_features))
            selected_features = set()  # ✅ Similarity: Resets feature selection per episode (prevents accumulation bias).

            prev_error = 1.0  # Start with max error before any selection.

            while available_features:
                action = self.select_action(current_state, available_features)
                available_features.remove(action)
                selected_features.add(action)

                # ✅ Similarity: Computes reward based on accuracy improvement.
                reward, new_error = self.calculate_reward(X, y, selected_features, prev_error)
                prev_error = new_error  # Update previous error for next step.

                # ✅ Similarity: Q-values updated using the reinforcement learning rule.
                self.update_q_values(current_state, action, reward, n_features)

                current_state = f"s{action}"  # ✅ Similarity: Tracks feature selection transitions as states.

                # ✅ Similarity: Stops when reward improvement is negligible (convergence criterion).
                if abs(reward) < 0.01:
                    break  

            # ✅ Similarity: Decays epsilon and learning rate for better convergence.
            self.epsilon = max(0.1, self.epsilon * 0.99)
            self.learning_rate = max(0.01, self.learning_rate * 0.99)

            best_feature_sets.append(sorted(selected_features))
            print(f"Iteration {episode+1}: Selected Features: {sorted(selected_features)}")

        # ✅ Similarity: Final feature selection is based on the most frequently chosen subset across episodes.
        final_features = max(set(tuple(f) for f in best_feature_sets), key=best_feature_sets.count)
        return list(final_features)

    def select_action(self, state, available_features):
        # ✅ Similarity: Uses ε-greedy strategy for balancing exploration and exploitation.
        if np.random.rand() < self.epsilon:
            return np.random.choice(list(available_features))  # Exploration
        else:
            return max(available_features, key=lambda a: self.q_values[(state, a)])  # Exploitation

    def calculate_reward(self, X, y, selected_features, prev_error):
        if not selected_features:
            return 0, prev_error
        X_subset = X[:, list(selected_features)]
        self.learner.fit(X_subset, y)
        y_pred = self.learner.predict(X_subset)
        error = 1 - accuracy_score(y, y_pred)

        # ✅ Similarity: Reward function is based on improvement in model accuracy (negative error reduction).
        reward = prev_error - error  
        return reward, error

    def update_q_values(self, state, action, reward, n_features):
        next_state = f"s{action}"
        # ✅ Similarity: Finds max Q-value for next state to use in Q-learning update rule.
        possible_q_values = [self.q_values[(next_state, a)] for a in range(n_features) 
                             if (next_state, a) in self.q_values]
        max_next_q = max(possible_q_values, default=0)  # Avoids empty sequence error.

        # ✅ Similarity: Standard Q-learning update rule.
        self.q_values[(state, action)] += self.learning_rate * (
            reward + self.discount_factor * max_next_q - self.q_values[(state, action)]
        )

# ✅ Similarity: Runs the algorithm on a dataset, like in the paper's experiments.
X, y = generate_dataset()
cfs = CFS()
selected_features = cfs.run(X, y)

# ✅ Similarity: Prints the final selected feature set, as done in the paper's results section.
print("\nFinal Selected Features:", selected_features)


Iteration 1: Selected Features: [1, 2]
Iteration 2: Selected Features: [2, 3]
Iteration 3: Selected Features: [0, 1]
Iteration 4: Selected Features: [3, 4]
Iteration 5: Selected Features: [1, 4]
Iteration 6: Selected Features: [3, 4]
Iteration 7: Selected Features: [1, 4]
Iteration 8: Selected Features: [0, 3]
Iteration 9: Selected Features: [1, 2]
Iteration 10: Selected Features: [0, 1]
Iteration 11: Selected Features: [3, 4]
Iteration 12: Selected Features: [2, 3]
Iteration 13: Selected Features: [3, 4]
Iteration 14: Selected Features: [0, 4]
Iteration 15: Selected Features: [0, 1]
Iteration 16: Selected Features: [0, 4]
Iteration 17: Selected Features: [1, 3]
Iteration 18: Selected Features: [2, 3]
Iteration 19: Selected Features: [1, 4]
Iteration 20: Selected Features: [0, 3]

Final Selected Features: [0, 1]
