In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np

In [3]:
# Define activations separately for indexing and instantiation
activation_names = ['sigmoid', 'tanh', 'relu', 'elu', 'selu', 'swish']
activation_map = {
    'sigmoid': nn.Sigmoid,
    'tanh': nn.Tanh,
    'relu': nn.ReLU,
    'elu': nn.ELU,
    'selu': nn.SELU,
    'swish': lambda: nn.SiLU(),  # swish ≈ SiLU in PyTorch
}

def decode_action(action_id):
    if 0 <= action_id <= 41:  # Dense Layer with some number of units
        units_list = [8, 16, 32, 64, 128, 256, 512]
        units_idx = action_id // len(activation_names)
        act_idx = action_id % len(activation_names)
        units = units_list[units_idx]
        activation_name = activation_names[act_idx]
        activation_fn = activation_map[activation_name]
        return ('dense', units, activation_name)  

    elif 42 <= action_id <= 44:  # Dropout layer
        dropout_rates = [0.0, 0.2, 0.5]
        return ('dropout', dropout_rates[action_id - 42])

    elif action_id == 45:  # BatchNorm layer
        return ('batchnorm',)

    elif action_id == 46:  # Stop building layers
        return ('stop',)

    else:
        raise ValueError(f"Invalid action id: {action_id}")


In [5]:
class NASMLPEnv(Env):
    def __init__(self, dataset, max_layers=6):
        self.dataset = dataset  
        self.max_layers = max_layers
        self.action_space = Discrete(47)
        self.observation_space = Box(low=0, high=1, shape=(max_layers * 3,), dtype=np.float32) # 3 inputs per layer, layer type, number of units, and activation function
        self.architecture = []
        self.done = False

        self.best_architecture = None
        self.best_reward = -float('inf')

        self.architecture_log = []

    def reset(self):
        self.architecture = []
        self.done = False
        return self._get_obs()

    def step(self, action_id):
        decoded = decode_action(action_id)

        if decoded[0] == 'stop' or len(self.architecture) >= self.max_layers:
            self.done = True
            reward = self._evaluate_model()
            self.architecture_log.append({ # logging the results so that we can analyze later
                'architecture': list(self.architecture),
                'reward': reward
            })
            print(f"\n🎯 Final Architecture: {self.architecture}")
            print(f"🏆 Validation Accuracy (Reward): {reward:.2f}%\n")

            # Track best
            if reward > self.best_reward:
                self.best_reward = reward
                self.best_architecture = list(self.architecture)
                print(f"🌟 New Best Architecture Found with Reward: {reward:.2f}%")
        else:
            self.architecture.append(decoded)
            reward = 0

        return self._get_obs(), reward, self.done, {}

    def _get_obs(self):
        obs = np.zeros(self.observation_space.shape[0])
        for i, layer in enumerate(self.architecture):
            base = i * 3
            if layer[0] == 'dense':
                obs[base] = layer[1] / 512  # normalized units
                obs[base + 1] = activation_names.index(layer[2]) / (len(activation_names) - 1)
                obs[base + 2] = 0
            elif layer[0] == 'dropout':
                obs[base] = layer[1]
                obs[base + 1] = -1
                obs[base + 2] = 1
            elif layer[0] == 'batchnorm':
                obs[base] = -1
                obs[base + 1] = -1
                obs[base + 2] = 2
        return obs

    def _evaluate_model(self):
        X_train, y_train, X_val, y_val = self.dataset

        model = nn.Sequential()
        input_dim = X_train.shape[1]

        for i, layer in enumerate(self.architecture):
            if layer[0] == 'dense':
                model.add_module(f"fc{i}", nn.Linear(input_dim, layer[1]))
                model.add_module(f"act{i}", activation_map[layer[2]]())
                input_dim = layer[1]
            elif layer[0] == 'dropout':
                model.add_module(f"dropout{i}", nn.Dropout(p=layer[1]))
            elif layer[0] == 'batchnorm':
                model.add_module(f"bn{i}", nn.BatchNorm1d(input_dim))

        model.add_module("output", nn.Linear(input_dim, y_train.shape[1]))

        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01) # can make the RL choose optimizer and LR also here
        train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)

        model.train()
        for _ in range(3):  # Only 3 epochs, can change this
            for xb, yb in train_loader:
                optimizer.zero_grad()
                output = model(xb)
                loss = loss_fn(output, yb)
                loss.backward()
                optimizer.step()

        model.eval()
        with torch.no_grad():
            preds = model(X_val)
            acc = (preds.argmax(dim=1) == y_val.argmax(dim=1)).float().mean().item()

        return acc * 100

In [7]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.nn.functional import one_hot

import openml
dataset = openml.datasets.get_dataset(42769)
X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

df = pd.concat([X, y], axis=1)

df.dropna(inplace=True)

X = df.drop(columns=[dataset.default_target_attribute])
y = df[dataset.default_target_attribute]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

le = LabelEncoder()
y_encoded = le.fit_transform(y)  

y_oh = one_hot(torch.tensor(y_encoded)).float()

X_train_np, X_val_np, y_train_tensor, y_val_tensor = train_test_split(
    X_scaled, y_oh, test_size=0.1, random_state=42)

X_train_tensor = torch.tensor(X_train_np).float()
X_val_tensor = torch.tensor(X_val_np).float()

In [8]:
dataset = (X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)

In [11]:
print(X_train_tensor.shape)  # [n_train, n_features]
print(y_train_tensor.shape)  # [n_train, 2]  (one-hot binary)

torch.Size([900000, 28])
torch.Size([900000, 2])


In [17]:
# Pass into your NAS environment
env = NASMLPEnv(dataset=dataset, max_layers=5)

In [29]:
from stable_baselines3 import DQN

import time

start_time = time.time()

model_1000 = DQN("MlpPolicy", env, verbose=1, tensorboard_log="./nas_logs/")
model_1000.learn(total_timesteps=1000)

end_time = time.time()
elapsed_time = end_time - start_time

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./nas_logs/DQN_10





🎯 Final Architecture: [('dense', 16, 'swish'), ('dense', 128, 'sigmoid'), ('dense', 512, 'elu'), ('dense', 512, 'sigmoid'), ('dense', 16, 'selu')]
🏆 Validation Accuracy (Reward): 52.95%


🎯 Final Architecture: [('dense', 16, 'tanh'), ('dense', 128, 'selu'), ('dense', 256, 'swish'), ('dense', 8, 'relu'), ('dense', 256, 'elu')]
🏆 Validation Accuracy (Reward): 52.95%


🎯 Final Architecture: [('dense', 256, 'selu'), ('dense', 512, 'swish'), ('dense', 32, 'swish'), ('dense', 128, 'tanh'), ('dense', 256, 'relu')]
🏆 Validation Accuracy (Reward): 52.95%


🎯 Final Architecture: [('dense', 512, 'elu'), ('dense', 128, 'selu'), ('dense', 128, 'swish'), ('dense', 256, 'selu'), ('dense', 16, 'sigmoid')]
🏆 Validation Accuracy (Reward): 52.95%

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 6        |
|    ep_rew_mean      | 53       |
|    exploration_rate | 0.772    |
| time/               |          |
|    episodes         | 4        |
|    fps       

KeyboardInterrupt: 

In [21]:
model.save("nas_agent_checkpoint")

In [None]:
# analyze the results

In [23]:
print(elapsed_time)

NameError: name 'elapsed_time' is not defined

In [31]:
print(env.best_architecture)

[('dense', 128, 'sigmoid'), ('dense', 64, 'relu'), ('dense', 8, 'elu'), ('dense', 128, 'elu'), ('batchnorm',)]


In [33]:
print(f"\n Best Validation Accuracy (Reward): {env.best_reward:.2f}%")


 Best Validation Accuracy (Reward): 74.23%


In [35]:
# Analyzing the different combinations
import pandas as pd

flat_data = []
for run in env.architecture_log:
    has_dropout = any(layer[0] == 'dropout' for layer in run['architecture'])
    has_batchnorm = any(layer[0] == 'batchnorm' for layer in run['architecture'])

    for layer in run['architecture']:
        if layer[0] == 'dense':
            flat_data.append({
                'units': layer[1],
                'activation': layer[2],
                'dropout': has_dropout,
                'batchnorm': has_batchnorm,
                'reward': run['reward']
            })
df = pd.DataFrame(flat_data)
df.to_csv("architecture_analysis.csv", index=False)

In [37]:
df

Unnamed: 0,units,activation,dropout,batchnorm,reward
0,8,selu,True,False,69.476002
1,128,selu,True,False,69.476002
2,128,relu,True,False,69.476002
3,256,swish,False,False,47.049001
4,64,swish,False,False,47.049001
...,...,...,...,...,...
2759,256,relu,False,False,73.479003
2760,256,relu,False,False,73.479003
2761,256,relu,False,False,73.479003
2762,256,relu,False,False,73.479003


In [None]:
# analyze effect of different units in each layer
# analyze effect of different activation functions
# analyze the effect of different layers