<a href="https://colab.research.google.com/github/hamidzangiabadi/DQN-ACO/blob/main/DQNandACO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Training Loop

In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

class FeatureSelectionEnv:
    def __init__(self, X, y, penalty_factor=0.01):
        self.X = X
        self.y = y
        self.n_features = X.shape[1]
        self.state = np.zeros(self.n_features)  # Binary state vector indicating feature selection
        self.current_step = 0
        self.penalty_factor = penalty_factor  # Factor to penalize feature count

    def reset(self):
        self.state = np.zeros(self.n_features)  # Reset state to all features unselected
        self.current_step = 0
        return self.state

    def step(self, action):
        # Ensure action is a scalar value (0 or 1)
        action = int(action)  # Convert action to integer

        if action not in [0, 1]:
            raise ValueError("Action must be either 0 or 1")

        # Apply action to state
        self.state[self.current_step] = action
        self.current_step += 1

        done = self.current_step == self.n_features
        reward = self.evaluate() if done else 0
        return self.state, reward, done

    def evaluate(self):
        selected_features = [i for i in range(self.n_features) if self.state[i] == 1]
        if len(selected_features) == 0:
            return 0
        X_selected = self.X[:, selected_features]
        classifier = RandomForestClassifier()
        score = cross_val_score(classifier, X_selected, self.y, cv=5).mean()
        # Penalize based on the number of features selected
        penalty = self.penalty_factor * len(selected_features)
        return score - penalty


In [2]:
import torch
import torch.nn as nn

class DQN(nn.Module):
    def __init__(self, n_features):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(n_features, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 2)  # Output 2 values for action selection

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import torch

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [4]:
import torch.optim as optim
import random
from collections import namedtuple, deque

Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))

class ReplayMemory:
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

def select_action(state, policy_net, epsilon):
    if random.random() < epsilon:
        return random.randint(0, 1)  # Random action (exploration)
    with torch.no_grad():
        state_tensor = torch.FloatTensor(state).unsqueeze(0)  # Add batch dimension
        q_values = policy_net(state_tensor)
        return q_values.argmax().item()  # Best action (exploitation)

def optimize_model(policy_net, target_net, memory, optimizer, batch_size, gamma):
    if len(memory) < batch_size:
        return

    transitions = memory.sample(batch_size)
    batch = Transition(*zip(*transitions))

    state_batch = torch.FloatTensor(batch.state)
    action_batch = torch.LongTensor(batch.action).unsqueeze(1)
    reward_batch = torch.FloatTensor(batch.reward)
    next_state_batch = torch.FloatTensor(batch.next_state)

    q_values = policy_net(state_batch).gather(1, action_batch)
    next_q_values = target_net(next_state_batch).max(1)[0].detach()
    expected_q_values = reward_batch + (gamma * next_q_values)

    loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [5]:
import torch
import torch.optim as optim
import random
from collections import namedtuple, deque

Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))

class ReplayMemory:
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

def select_action(state, policy_net, epsilon):
    if random.random() < epsilon:
        return random.randint(0, 1)  # Random action (exploration)
    with torch.no_grad():
        state_tensor = torch.FloatTensor(state).unsqueeze(0)  # Add batch dimension
        q_values = policy_net(state_tensor)
        return q_values.argmax().item()  # Best action (exploitation)

def optimize_model(policy_net, target_net, memory, optimizer, batch_size, gamma):
    if len(memory) < batch_size:
        return

    transitions = memory.sample(batch_size)
    batch = Transition(*zip(*transitions))

    state_batch = torch.FloatTensor(batch.state)
    action_batch = torch.LongTensor(batch.action).unsqueeze(1)
    reward_batch = torch.FloatTensor(batch.reward)
    next_state_batch = torch.FloatTensor(batch.next_state)

    q_values = policy_net(state_batch).gather(1, action_batch)
    next_q_values = target_net(next_state_batch).max(1)[0].detach()
    expected_q_values = reward_batch + (gamma * next_q_values)

    loss = nn.MSELoss()(q_values, expected_q_values.unsqueeze(1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Parameters
gamma = 0.99
epsilon = 0.1
batch_size = 32
target_update = 10
n_episodes = 20

# Initialize environment, DQN, and optimizer
env = FeatureSelectionEnv(X_train, y_train, penalty_factor=0.01)  # Adjust penalty_factor as needed
n_features = env.n_features

policy_net = DQN(n_features)
target_net = DQN(n_features)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=1e-3)
memory = ReplayMemory(1000)

for episode in range(n_episodes):
    state = env.reset()
    done = False

    while not done:
        action = select_action(state, policy_net, epsilon)
        next_state, reward, done = env.step(action)

        memory.push(state, action, reward, next_state)
        optimize_model(policy_net, target_net, memory, optimizer, batch_size, gamma)
        state = next_state

    if episode % target_update == 0:
        target_net.load_state_dict(policy_net.state_dict())

    print(f"Episode {episode + 1} completed")

print("Training completed.")

# Extract the selected features
selected_features = [i for i in range(n_features) if env.state[i] == 1]
print("Selected features:", selected_features)

# Evaluate the performance using the selected features
def evaluate_selected_features(X_test, y_test, selected_features):
    if len(selected_features) == 0:
        return 0
    X_selected = X_test[:, selected_features]
    classifier = RandomForestClassifier()
    classifier.fit(X_train[:, selected_features], y_train)
    accuracy = classifier.score(X_selected, y_test)
    return accuracy

accuracy = evaluate_selected_features(X_test, y_test, selected_features)
print(f"Accuracy with selected features: {accuracy:.4f}")


Episode 1 completed
Episode 2 completed
Episode 3 completed
Episode 4 completed
Episode 5 completed
Episode 6 completed
Episode 7 completed


  state_batch = torch.FloatTensor(batch.state)


Episode 8 completed
Episode 9 completed
Episode 10 completed
Episode 11 completed
Episode 12 completed
Episode 13 completed
Episode 14 completed
Episode 15 completed
Episode 16 completed
Episode 17 completed
Episode 18 completed
Episode 19 completed
Episode 20 completed
Training completed.
Selected features: [0, 1]
Accuracy with selected features: 0.7778
