In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np

In [2]:
# Define activations separately for indexing and instantiation
activation_names = ['sigmoid', 'tanh', 'relu', 'elu', 'selu', 'swish']
activation_map = {
    'sigmoid': nn.Sigmoid,
    'tanh': nn.Tanh,
    'relu': nn.ReLU,
    'elu': nn.ELU,
    'selu': nn.SELU,
    'swish': lambda: nn.SiLU(),  # swish ≈ SiLU in PyTorch
}

def decode_action(action_id):
    if 0 <= action_id <= 41:  # Dense Layer with some number of units
        units_list = [8, 16, 32, 64, 128, 256, 512]
        units_idx = action_id // len(activation_names)
        act_idx = action_id % len(activation_names)
        units = units_list[units_idx]
        activation_name = activation_names[act_idx]
        activation_fn = activation_map[activation_name]
        return ('dense', units, activation_name)  

    elif 42 <= action_id <= 44:  # Dropout layer
        dropout_rates = [0.0, 0.2, 0.5]
        return ('dropout', dropout_rates[action_id - 42])

    elif action_id == 45:  # BatchNorm layer
        return ('batchnorm',)

    elif action_id == 46:  # Stop building layers
        return ('stop',)

    else:
        raise ValueError(f"Invalid action id: {action_id}")


In [5]:
# make sure you understand how this is working and how the results are being logged. 

In [7]:
class NASMLPEnv(Env):
    def __init__(self, dataset, max_layers=10):
        self.dataset = dataset  
        self.max_layers = max_layers
        self.action_space = Discrete(47)
        self.observation_space = Box(low=0, high=1, shape=((max_layers + 10) * 3,), dtype=np.float32)  # 3 inputs per layer, layer type, number of units, and activation function
        self.architecture = []
        self.done = False

        self.best_architecture = None
        self.best_reward = -float('inf')
        self.architecture_log = []

        self.max_parameters = self._estimate_max_parameters()

    def reset(self):
        if self.architecture:
            self.architecture_log.append({
                'architecture': list(self.architecture),
                'reward': 0,
                'accuracy': None,
                'complexity': None,
                'complexity_epochs': None,
                'completed': False,
                'termination_reason': 'incomplete'
            })
        self.architecture = []
        self.done = False
        return self._get_obs()

    def _estimate_max_parameters(self):
        input_dim = self.dataset[0].shape[1]  
        max_units = 512
        total_params = 0
    
        for _ in range(self.max_layers):
            total_params += input_dim * max_units + max_units  
            input_dim = max_units
    
        output_dim = self.dataset[1].shape[1]
        total_params += max_units * output_dim + output_dim
    
        return total_params


    def step(self, action_id):
        decoded = decode_action(action_id)
        if len(self.architecture) == 0 and decoded[0] == 'stop':
            return self._get_obs(), -100.0, False, {}

        dense_count = sum(1 for layer in self.architecture if layer[0] == 'dense')
        if decoded[0] == 'stop' or (decoded[0] == 'dense' and dense_count >= self.max_layers):
            self.done = True
            reward, acc, complexity, complexity_epochs = self._evaluate_model()  

            # Log architecture, reward, accuracy, and complexity
            self.architecture_log.append({
                'architecture': list(self.architecture),
                'reward': reward,
                'accuracy': acc,
                'complexity': complexity,
                'complexity_epochs': complexity_epochs,
                'completed': True,
                'termination_reason': 'stop' if decoded[0] == 'stop' else 'max_dense'
            })
        
    
            print(f"\n🎯 Final Architecture: {self.architecture}")
            print(f"🏆 Validation Accuracy (acc): {acc:.2f}%")
            print(f"⚙️  Complexity (number of layers): {complexity}")
            print(f"🏅 Reward (acc - penalty): {reward:.2f}%\n")
            print(f"🏅 Complexity (Number of epochs): {complexity_epochs:.2f}%\n")

            # Track best
            if reward > self.best_reward:
                self.best_reward = reward
                self.best_architecture = list(self.architecture)
                self.best_accuracy = acc  
                self.best_complexity = complexity 
                print(f"🌟 New Best Architecture Found with Reward: {reward:.2f}%")
        else:
            self.architecture.append(decoded)
            reward = 0

        return self._get_obs(), reward, self.done, {}

    def _get_obs(self):
        obs = np.zeros(self.observation_space.shape[0])
        for i, layer in enumerate(self.architecture):
            base = i * 3
            if layer[0] == 'dense':
                obs[base] = layer[1] / 512  # normalized units
                obs[base + 1] = activation_names.index(layer[2]) / (len(activation_names) - 1)
                obs[base + 2] = 0
            elif layer[0] == 'dropout':
                obs[base] = layer[1]
                obs[base + 1] = -1
                obs[base + 2] = 1
            elif layer[0] == 'batchnorm':
                obs[base] = -1
                obs[base + 1] = -1
                obs[base + 2] = 2
        return obs

    def _evaluate_model(self):
        X_train, y_train, X_val, y_val = self.dataset

        model = nn.Sequential()
        input_dim = X_train.shape[1]

        for i, layer in enumerate(self.architecture):
            if layer[0] == 'dense':
                model.add_module(f"fc{i}", nn.Linear(input_dim, layer[1]))
                model.add_module(f"act{i}", activation_map[layer[2]]())
                input_dim = layer[1]
            elif layer[0] == 'dropout':
                model.add_module(f"dropout{i}", nn.Dropout(p=layer[1]))
            elif layer[0] == 'batchnorm':
                model.add_module(f"bn{i}", nn.BatchNorm1d(input_dim))

        model.add_module("output", nn.Linear(input_dim, y_train.shape[1]))

        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01) # can make the RL choose optimizer and LR also here
        train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)

        best_acc = 0
        patience = 3
        patience_counter = 0
        max_epochs = 100
        epochs_used = 0
        model.train()
        for _ in range(max_epochs):  
            for xb, yb in train_loader:
                optimizer.zero_grad()
                output = model(xb)
                loss = loss_fn(output, yb)
                loss.backward()
                optimizer.step()
    
            # Early stopping check
            model.eval()
            with torch.no_grad():
                preds = model(X_val)
                acc = (preds.argmax(dim=1) == y_val.argmax(dim=1)).float().mean().item()
    
            epochs_used += 1
    
            if acc > best_acc + 1e-4:
                best_acc = acc
                patience_counter = 0
            else:
                patience_counter += 1
    
            if patience_counter >= patience:
                break

        no_of_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        max_params = self._estimate_max_parameters()
        epochs_percentage = epochs_used*100/max_epochs
        alpha = 0.8
        #beta = 0.1

        reward = (alpha * acc * 100) + ((1 - alpha) * (100 - (no_of_params * epochs_used)/(max_params * max_epochs)))
        #reward = (alpha * acc * 100) + ((1 - alpha - beta) * (100 - (no_of_params*100/max_params))) + (beta * (100 - epochs_percentage))

        return reward, acc * 100, 100 - (no_of_params*100/max_params), (100 - epochs_percentage)

# Reward function ideas:
- Something to do with epochs as well, some early stopping measure and then take the percentage of epochs: Done
- Keep only accuracy and make a list of the top 10/15 and compare, and choose the simplest one - To do
- Penalize time (instead of complexity) using a parameter lambda that is very small - To do 

In [10]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.nn.functional import one_hot

import openml
dataset = openml.datasets.get_dataset(23512)
X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)

df = pd.concat([X, y], axis=1)

df.dropna(inplace=True)

X = df.drop(columns=[dataset.default_target_attribute])
y = df[dataset.default_target_attribute]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

le = LabelEncoder()
y_encoded = le.fit_transform(y)  

y_oh = one_hot(torch.tensor(y_encoded)).float()

X_train_np, X_val_np, y_train_tensor, y_val_tensor = train_test_split(
    X_scaled, y_oh, test_size=0.1, random_state=42)

X_train_tensor = torch.tensor(X_train_np).float()
X_val_tensor = torch.tensor(X_val_np).float()

In [11]:
dataset = (X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)

In [12]:
print(X_train_tensor.shape)  # [n_train, n_features]
print(y_train_tensor.shape)  # [n_train, 2]  (one-hot binary)

torch.Size([88244, 28])
torch.Size([88244, 2])


In [16]:
# Pass into your NAS environment
env = NASMLPEnv(dataset=dataset, max_layers=10)

In [18]:
from stable_baselines3 import DQN

import time

start_time = time.time()

model = DQN(
    "MlpPolicy",
    env,
    gamma=0.99,                       
    exploration_initial_eps=1.0,     
    exploration_final_eps=0.05,      
    exploration_fraction=0.05,        
    verbose=1,
    tensorboard_log="./nas_logs/"
)

#model = DQN("MlpPolicy", env, verbose=1, tensorboard_log="./nas_logs/")
model.learn(total_timesteps=10000)

end_time = time.time()
elapsed_time = end_time - start_time

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




Logging to ./nas_logs/DQN_42

🎯 Final Architecture: [('dense', 16, 'swish'), ('dropout', 0.5), ('dense', 8, 'relu'), ('dense', 512, 'swish'), ('dense', 8, 'selu'), ('dense', 16, 'elu'), ('dropout', 0.0), ('dense', 64, 'relu'), ('dense', 128, 'swish'), ('dense', 512, 'sigmoid'), ('dense', 64, 'relu'), ('dense', 128, 'elu')]
🏆 Validation Accuracy (acc): 53.32%
⚙️  Complexity (number of layers): 94.69185781194716
🏅 Reward (acc - penalty): 62.66%

🏅 Complexity (Number of epochs): 96.00%

🌟 New Best Architecture Found with Reward: 62.66%

🎯 Final Architecture: [('dense', 64, 'elu'), ('batchnorm',), ('dense', 64, 'swish'), ('dense', 512, 'tanh'), ('dense', 256, 'relu'), ('dense', 8, 'relu'), ('dropout', 0.0), ('dense', 8, 'relu'), ('dense', 64, 'tanh'), ('dense', 32, 'tanh'), ('dense', 256, 'tanh'), ('dense', 64, 'swish')]
🏆 Validation Accuracy (acc): 53.32%
⚙️  Complexity (number of layers): 91.57223909120935
🏅 Reward (acc - penalty): 62.66%

🏅 Complexity (Number of epochs): 96.00%


🎯 Fina

In [29]:
model.save("nas_agent_checkpoint_alpha_0.8_epoch_reward_23512_no_stop_1st")

In [31]:
# load previous model
# model = DQN.load("nas_agent_checkpoint")

In [33]:
# analyze the results

In [35]:
print(elapsed_time)

112058.18363213539


In [37]:
print(env.best_architecture)

[('dense', 16, 'sigmoid'), ('dense', 16, 'sigmoid'), ('dense', 16, 'sigmoid'), ('dense', 16, 'sigmoid'), ('dense', 16, 'sigmoid')]


In [39]:
# interesting result: look into it, pretty exciting

In [41]:
print(f"\n Best Reward: {env.best_reward:.2f}%")


 Best Reward: 77.90%


In [43]:
print(f"\n Best Accuracy: {env.best_accuracy:.2f}%")


 Best Accuracy: 72.37%


In [45]:
print(f"\n Best Complexity: {env.best_complexity:.2f}%")


 Best Complexity: 99.93%


In [None]:
print(f"\n Best Complexity Epochs: {env.complexity_epochs:.2f}%")

In [46]:
import pandas as pd

flat_data = []

for arch_id, run in enumerate(env.architecture_log):
    reward = run['reward']
    accuracy = run.get('accuracy', None)
    complexity = run.get('complexity', None)
    complexity_epochs = run.get('complexity_epochs', None)
    completed = run.get('completed', False)
    termination_reason = run.get('termination_reason', 'unknown')

    for i, layer in enumerate(run['architecture']):
        layer_type = layer[0]
        layer_info = {
            'arch_id': arch_id,
            'layer_num': i + 1,
            'layer_type': layer_type,
            'units': None,
            'activation': None,
            'dropout_rate': None,
            'reward': reward,
            'accuracy': accuracy,
            'complexity': complexity,
            'complexity_epochs': complexity_epochs,, 
            'completed': completed,
            'termination_reason': termination_reason
        }

        if layer_type == 'dense':
            layer_info['units'] = layer[1]
            layer_info['activation'] = layer[2]

        elif layer_type == 'dropout':
            layer_info['dropout_rate'] = layer[1]

        flat_data.append(layer_info)

# Create DataFrame and export to CSV
df = pd.DataFrame(flat_data)
df.to_csv("architecture_analysis_updated_alpha_0.9_beta_0.1.csv", index=False)

In [78]:
df = pd.read_csv("architecture_analysis_updated_alpha_0.9.csv")

In [96]:
# still needs to look at why there is only 6728 rows
df

Unnamed: 0,arch_id,layer_num,layer_type,units,activation,dropout_rate,reward,accuracy,complexity,completed,termination_reason
0,0,1,dense,8.0,swish,,47.520247,53.319734,4.675142,True,max_dense
1,0,2,dense,256.0,swish,,47.520247,53.319734,4.675142,True,max_dense
2,0,3,dense,32.0,selu,,47.520247,53.319734,4.675142,True,max_dense
3,0,4,dense,256.0,elu,,47.520247,53.319734,4.675142,True,max_dense
4,0,5,dense,16.0,swish,,47.520247,53.319734,4.675142,True,max_dense
...,...,...,...,...,...,...,...,...,...,...,...
7467,6862,2,dense,64.0,relu,,61.296611,68.271291,1.475516,True,stop
7468,6862,3,dense,512.0,elu,,61.296611,68.271291,1.475516,True,stop
7469,6863,1,dense,8.0,selu,,0.000000,,,False,incomplete
7470,6863,2,dense,64.0,relu,,0.000000,,,False,incomplete


In [92]:
df[df['arch_id'] == 1]

Unnamed: 0,arch_id,layer_num,layer_type,units,activation,dropout_rate,reward,accuracy,complexity,completed,termination_reason
10,1,1,dense,8.0,swish,,0.0,,,False,incomplete
11,1,2,dense,256.0,swish,,0.0,,,False,incomplete
12,1,3,dense,32.0,selu,,0.0,,,False,incomplete
13,1,4,dense,256.0,elu,,0.0,,,False,incomplete
14,1,5,dense,16.0,swish,,0.0,,,False,incomplete
15,1,6,dense,256.0,relu,,0.0,,,False,incomplete
16,1,7,dense,256.0,swish,,0.0,,,False,incomplete
17,1,8,dense,64.0,relu,,0.0,,,False,incomplete
18,1,9,dense,16.0,relu,,0.0,,,False,incomplete
19,1,10,dense,16.0,tanh,,0.0,,,False,incomplete


In [98]:
df[df['accuracy'].isna()].head(30)

Unnamed: 0,arch_id,layer_num,layer_type,units,activation,dropout_rate,reward,accuracy,complexity,completed,termination_reason
10,1,1,dense,8.0,swish,,0.0,,,False,incomplete
11,1,2,dense,256.0,swish,,0.0,,,False,incomplete
12,1,3,dense,32.0,selu,,0.0,,,False,incomplete
13,1,4,dense,256.0,elu,,0.0,,,False,incomplete
14,1,5,dense,16.0,swish,,0.0,,,False,incomplete
15,1,6,dense,256.0,relu,,0.0,,,False,incomplete
16,1,7,dense,256.0,swish,,0.0,,,False,incomplete
17,1,8,dense,64.0,relu,,0.0,,,False,incomplete
18,1,9,dense,16.0,relu,,0.0,,,False,incomplete
19,1,10,dense,16.0,tanh,,0.0,,,False,incomplete


In [100]:
df.sort_values('accuracy', ascending = False).head(30)

Unnamed: 0,arch_id,layer_num,layer_type,units,activation,dropout_rate,reward,accuracy,complexity,completed,termination_reason
6765,686,2,dense,64.0,sigmoid,,65.09911,72.36104,0.258259,True,stop
6764,686,1,dense,64.0,sigmoid,,65.09911,72.36104,0.258259,True,stop
6741,682,2,dense,64.0,sigmoid,,64.952248,72.19786,0.258259,True,stop
6740,682,1,dense,64.0,sigmoid,,64.952248,72.19786,0.258259,True,stop
6737,680,2,dense,64.0,sigmoid,,64.933886,72.177458,0.258259,True,stop
6736,680,1,dense,64.0,sigmoid,,64.933886,72.177458,0.258259,True,stop
6724,674,1,dense,64.0,sigmoid,,64.924708,72.167259,0.258259,True,stop
6725,674,2,dense,64.0,sigmoid,,64.924708,72.167259,0.258259,True,stop
6713,668,2,dense,64.0,sigmoid,,64.915529,72.157061,0.258259,True,stop
6712,668,1,dense,64.0,sigmoid,,64.915529,72.157061,0.258259,True,stop


In [67]:
df[df['accuracy'] < 60].head(100)

Unnamed: 0,arch_id,layer_num,layer_type,units,activation,dropout_rate,reward,accuracy,complexity,completed,termination_reason
0,0,1,dropout,,,0.5,34.325628,46.680266,15.092920,True,max_dense
1,0,2,dense,512.0,selu,,34.325628,46.680266,15.092920,True,max_dense
2,0,3,dense,256.0,relu,,34.325628,46.680266,15.092920,True,max_dense
3,0,4,dense,8.0,sigmoid,,34.325628,46.680266,15.092920,True,max_dense
4,0,5,dense,64.0,sigmoid,,34.325628,46.680266,15.092920,True,max_dense
...,...,...,...,...,...,...,...,...,...,...,...
192,20,9,dense,128.0,swish,,40.241831,53.319734,12.069781,True,stop
193,20,10,batchnorm,,,,40.241831,53.319734,12.069781,True,stop
194,20,11,dense,128.0,tanh,,40.241831,53.319734,12.069781,True,stop
228,24,1,dense,256.0,sigmoid,,42.334732,53.319734,1.605276,True,stop


In [63]:
df[df['arch_id'] == 8]

Unnamed: 0,arch_id,layer_num,layer_type,units,activation,dropout_rate,reward,accuracy,complexity,completed,termination_reason
84,8,1,dense,64.0,swish,,42.346902,53.319734,1.54443,True,max_dense
85,8,2,dense,16.0,relu,,42.346902,53.319734,1.54443,True,max_dense
86,8,3,dense,16.0,sigmoid,,42.346902,53.319734,1.54443,True,max_dense
87,8,4,dropout,,,0.0,42.346902,53.319734,1.54443,True,max_dense
88,8,5,dense,64.0,relu,,42.346902,53.319734,1.54443,True,max_dense
89,8,6,dense,64.0,swish,,42.346902,53.319734,1.54443,True,max_dense
90,8,7,dense,16.0,selu,,42.346902,53.319734,1.54443,True,max_dense
91,8,8,dense,128.0,sigmoid,,42.346902,53.319734,1.54443,True,max_dense
92,8,9,dense,32.0,selu,,42.346902,53.319734,1.54443,True,max_dense
93,8,10,dense,128.0,sigmoid,,42.346902,53.319734,1.54443,True,max_dense


In [65]:
df[df['layer_type'] == 'batchnorm']['accuracy'].mean()

53.75438220798969

In [49]:
df[df['layer_type'] == 'dropout']['accuracy'].mean()

56.00318891901365

In [41]:
df.groupby('activation')['accuracy'].mean()

activation
elu        58.187054
relu       59.291178
selu       56.725523
sigmoid    53.877449
swish      58.579698
tanh       55.326050
Name: accuracy, dtype: float64

In [43]:
df.groupby('units')['accuracy'].mean()

units
8.0      53.862298
16.0     57.987943
32.0     59.184169
64.0     57.417682
128.0    59.076210
256.0    53.799047
512.0    58.109830
Name: accuracy, dtype: float64

In [None]:
# analyze effect of different units in each layer
# analyze effect of different activation functions
# analyze the effect of different layers