In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np

In [2]:
# Define activations separately for indexing and instantiation
activation_names = ['sigmoid', 'tanh', 'relu', 'elu', 'selu', 'swish']
activation_map = {
    'sigmoid': nn.Sigmoid,
    'tanh': nn.Tanh,
    'relu': nn.ReLU,
    'elu': nn.ELU,
    'selu': nn.SELU,
    'swish': lambda: nn.SiLU(),  # swish ≈ SiLU in PyTorch
}

def decode_action(action_id):
    if 0 <= action_id <= 41:  # Dense Layer with some number of units
        units_list = [8, 16, 32, 64, 128, 256, 512]
        units_idx = action_id // len(activation_names)
        act_idx = action_id % len(activation_names)
        units = units_list[units_idx]
        activation_name = activation_names[act_idx]
        activation_fn = activation_map[activation_name]
        return ('dense', units, activation_name)  

    elif 42 <= action_id <= 44:  # Dropout layer
        dropout_rates = [0.0, 0.2, 0.5]
        return ('dropout', dropout_rates[action_id - 42])

    elif action_id == 45:  # BatchNorm layer
        return ('batchnorm',)

    elif action_id == 46:  # Stop building layers
        return ('stop',)

    else:
        raise ValueError(f"Invalid action id: {action_id}")


In [5]:
# look into changing this next time, add data specific features and start finding new datasets

In [7]:
class NASMLPEnv(Env):
    def __init__(self, datasets, max_layers=10):
        self.datasets = datasets  # list of (X_train, y_train, X_val, y_val)
        self.max_layers = max_layers
        self.action_space = Discrete(47)

        # Set up space for architecture + meta features (assuming 8)
        self.num_dataset_features = 8
        self.max_possible_layers = max_layers + 10
        obs_size = self.max_possible_layers * 3 + self.num_dataset_features
        self.observation_space = Box(low=0, high=1, shape=(obs_size,), dtype=np.float32)

        self.architecture = []
        self.done = False
        self.architecture_log = []

        # placeholders
        self.dataset = None
        self.dataset_features = None
        self.max_parameters = None

        # Performance tracking
        self.best_reward = -float('inf')
        self.best_architecture = None

        # Load first dataset
        self._sample_new_dataset()

    def _sample_new_dataset(self):
        self.dataset = random.choice(self.datasets)
        self.dataset_features = self._compute_dataset_features(self.dataset)
        self.max_parameters = self._estimate_max_parameters()

    def _compute_dataset_features(self, dataset):
        import torch
        from scipy.stats import skew
    
        X_train, y_train, X_val, y_val = dataset
        n_rows, n_features = X_train.shape
    
        feature_std = X_train.std(dim=0).mean().item()
        class_balance = y_train.sum(dim=0) / y_train.shape[0]
        class_balance = class_balance.max().item()
    
        # Skewness
        feature_skewness = torch.tensor(skew(X_train.numpy(), axis=0, nan_policy='omit')).abs().mean().item()
    
        # Feature correlation
        if n_features > 1:
            corr = torch.corrcoef(X_train.T)
            avg_pairwise_corr = corr.abs().triu(diagonal=1).mean().item()
        else:
            avg_pairwise_corr = 0.0
    
        # Number of classes
        n_classes = y_train.shape[1] if len(y_train.shape) > 1 else len(torch.unique(y_train))
    
        return np.array([
            n_rows / 1e5,
            n_features / 1e3,
            feature_std / 10.0,
            class_balance,
            feature_skewness / 10.0,
            avg_pairwise_corr,
            n_classes / 100.0  # normalize assuming <100 classes
        ])


    def reset(self):
        if self.architecture:
            self.architecture_log.append({
                'architecture': list(self.architecture),
                'reward': 0,
                'accuracy': None,
                'complexity': None,
                'completed': False,
                'termination_reason': 'incomplete'
            })
        
        # Resample dataset each episode
        self._sample_new_dataset()
        self.architecture = []
        self.done = False
        return self._get_obs()


    def _estimate_max_parameters(self):
        input_dim = self.dataset[0].shape[1]  
        max_units = 512
        total_params = 0
    
        for _ in range(self.max_layers):
            total_params += input_dim * max_units + max_units  
            input_dim = max_units
    
        output_dim = self.dataset[1].shape[1]
        total_params += max_units * output_dim + output_dim
    
        return total_params


    def step(self, action_id):
        decoded = decode_action(action_id)

        dense_count = sum(1 for layer in self.architecture if layer[0] == 'dense')
        if decoded[0] == 'stop' or (decoded[0] == 'dense' and dense_count >= self.max_layers):
            self.done = True
            reward, acc, complexity, complexity_epochs = self._evaluate_model()  

            # Log architecture, reward, accuracy, and complexity
            self.architecture_log.append({
                'architecture': list(self.architecture),
                'reward': reward,
                'accuracy': acc,
                'complexity': complexity,
                'complexity_epochs': complexity_epochs,
                'completed': True,
                'termination_reason': 'stop' if decoded[0] == 'stop' else 'max_dense'
            })
    
            print(f"\n🎯 Final Architecture: {self.architecture}")
            print(f"🏆 Validation Accuracy (acc): {acc:.2f}%")
            print(f"⚙️  Complexity (number of layers): {complexity}")
            print(f"🏅 Reward (acc - penalty): {reward:.2f}%\n")

            # Track best
            if reward > self.best_reward:
                self.best_reward = reward
                self.best_architecture = list(self.architecture)
                self.best_accuracy = acc  
                self.best_complexity = complexity 
                print(f"🌟 New Best Architecture Found with Reward: {reward:.2f}%")
        else:
            self.architecture.append(decoded)
            reward = 0

        return self._get_obs(), reward, self.done, {}

    def _get_obs(self):
        obs = np.zeros(self.observation_space.shape[0])
        for i, layer in enumerate(self.architecture):
            base = i * 3
            if layer[0] == 'dense':
                obs[base] = layer[1] / 512  # normalized units
                obs[base + 1] = activation_names.index(layer[2]) / (len(activation_names) - 1)
                obs[base + 2] = 0
            elif layer[0] == 'dropout':
                obs[base] = layer[1]
                obs[base + 1] = -1
                obs[base + 2] = 1
            elif layer[0] == 'batchnorm':
                obs[base] = -1
                obs[base + 1] = -1
                obs[base + 2] = 2
        obs[-len(self.dataset_features):] = self.dataset_features
        return obs

    def _evaluate_model(self):
        X_train, y_train, X_val, y_val = self.dataset

        model = nn.Sequential()
        input_dim = X_train.shape[1]

        for i, layer in enumerate(self.architecture):
            if layer[0] == 'dense':
                model.add_module(f"fc{i}", nn.Linear(input_dim, layer[1]))
                model.add_module(f"act{i}", activation_map[layer[2]]())
                input_dim = layer[1]
            elif layer[0] == 'dropout':
                model.add_module(f"dropout{i}", nn.Dropout(p=layer[1]))
            elif layer[0] == 'batchnorm':
                model.add_module(f"bn{i}", nn.BatchNorm1d(input_dim))

        model.add_module("output", nn.Linear(input_dim, y_train.shape[1]))

        loss_fn = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01) # can make the RL choose optimizer and LR also here
        train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)

        best_acc = 0
        patience = 3
        patience_counter = 0
        max_epochs = 100
        epochs_used = 0
        model.train()
        for _ in range(max_epochs):  
            for xb, yb in train_loader:
                optimizer.zero_grad()
                output = model(xb)
                loss = loss_fn(output, yb)
                loss.backward()
                optimizer.step()
    
            # Early stopping check
            model.eval()
            with torch.no_grad():
                preds = model(X_val)
                acc = (preds.argmax(dim=1) == y_val.argmax(dim=1)).float().mean().item()
    
            epochs_used += 1
    
            if acc > best_acc + 1e-4:
                best_acc = acc
                patience_counter = 0
            else:
                patience_counter += 1
    
            if patience_counter >= patience:
                break

        no_of_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        max_params = self._estimate_max_parameters()
        epochs_percentage = epochs_used*100/max_epochs
        alpha = 0.8
        #beta = 0.1

        reward = (alpha * acc * 100) + ((1 - alpha) * (100 - (no_of_params * epochs_used)/(max_params * max_epochs)))
        #reward = (alpha * acc * 100) + ((1 - alpha - beta) * (100 - (no_of_params*100/max_params))) + (beta * (100 - epochs_percentage))

        return reward, acc * 100, 100 - (no_of_params*100/max_params), (100 - epochs_percentage)

In [9]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.nn.functional import one_hot
import openml
import random
from ucimlrepo import fetch_ucirepo

def load_and_process_dataset(source, target_column=None, dataset_id=None, 
                             uci_id=None, test_size=0.1, random_state=42, n_samples=None, usecols=None ):
    """
    Load and process dataset from OpenML, UCI (via ucimlrepo), or Kaggle-style CSV.

    Parameters:
        - source: "openml", "uci", or "kaggle"
        - target_column: name of the target column (required for kaggle)
        - dataset_id: OpenML dataset ID (required if source="openml")
        - uci_id: UCI ML repo dataset ID (required if source="uci")
        - test_size: fraction for validation split
        - random_state: seed

    Returns:
        - Tuple: (X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)
    """
    # === 1. Load dataset ===
    if source == "openml":
        if dataset_id is None:
            raise ValueError("You must provide a dataset_id for OpenML datasets.")
        dataset = openml.datasets.get_dataset(dataset_id)
        X, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)
        target_column = dataset.default_target_attribute
        df = pd.concat([X, y], axis=1)

    
    elif source == "uci":
        if uci_id is None:
            raise ValueError("You must provide uci_id when source='uci'.")

        # 1) Get the download URL & headers
        repo     = fetch_ucirepo(id=uci_id)
        data_url = repo.metadata.data_url    # raw CSV link
        headers  = repo.data.headers         # full list of column names

        # 2) Infer the target column if needed
        if target_column is None:
            target_column = getattr(repo.metadata, "target_col", headers[-1])
        if isinstance(target_column, (list, tuple)):
            if len(target_column) == 1:
                target_column = target_column[0]
            else:
                raise ValueError(
                    f"Multiple possible targets {target_column}; please specify one."
                )

        # 3) Build pandas.read_csv kwargs
        read_kwargs = {"header": 0}
        if n_samples is not None:
            read_kwargs["nrows"] = n_samples

        # only restrict columns if user actually passed usecols
        if usecols is not None:
            if any(not isinstance(c, str) for c in usecols):
                raise TypeError("All entries in usecols must be strings")
            cols_to_read = set(usecols) | {target_column}
            read_kwargs["usecols"] = cols_to_read

        # 4) Read exactly up to n_samples rows (and either all or a subset of cols)
        df = pd.read_csv(data_url, **read_kwargs)

    elif source == "kaggle":
        if dataset_id is None or target_column is None:
            raise ValueError("For Kaggle datasets, provide file_path and target_column.")
        df = pd.read_csv(dataset_id)  # here, dataset_id acts as the file path

    else:
        raise ValueError("source must be one of: 'openml', 'uci', 'kaggle'")

    # if usecols is not None:
    #     # always keep the target col
    #     cols = set(usecols) | {target_column}
    #     df = df.loc[:, df.columns.intersection(cols)]

    # # === NEW: row sampling ===
    # if n_samples is not None and n_samples < len(df):
    #     # random_state for reproducibility
    #     df = df.sample(n=n_samples, random_state=random_state)

    # === 2. Drop missing values ===
    df.dropna(inplace=True)

    # === 3. Split features and target ===
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # 1. Convert categorical columns to strings (safe fallback)
    X = X.apply(lambda col: col.astype(str) if col.dtype == 'object' else col)
    
    # 2. One-hot encode categorical features
    X = pd.get_dummies(X)
    
    # === 4. Normalize features ===
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # === 5. Encode labels ===
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_tensor = one_hot(torch.tensor(y_encoded)).float()

    # === 6. Train/val split and tensor conversion ===
    X_train_np, X_val_np, y_train_tensor, y_val_tensor = train_test_split(
        X_scaled, y_tensor, test_size=test_size, random_state=random_state
    )

    X_train_tensor = torch.tensor(X_train_np).float()
    X_val_tensor = torch.tensor(X_val_np).float()

    return (X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor)

In [11]:
# figure this out

In [11]:
ds_higgs = load_and_process_dataset(source="openml", dataset_id=23512)

In [14]:
#ds_hepmass = load_and_process_dataset(source="uci", uci_id=347)
ds_forestcover = load_and_process_dataset(source="uci", uci_id=31, n_samples = 100000)

In [16]:
#ds_bankchurn = load_and_process_dataset(source="kaggle", dataset_id="Churn_Modelling.csv", target_column="Excited")
ds_diabetes = load_and_process_dataset(source="uci", uci_id=891, n_samples = 100000)

In [17]:
#ds_lattice = load_and_process_dataset(source="uci", uci_id=1091, n_samples = 20000)
#ds_banking = load_and_process_dataset(source="uci", uci_id=222, n_samples = 20000)
ds_student = load_and_process_dataset(source="uci", uci_id=697, n_samples = 100000)

In [18]:
ds_spam = load_and_process_dataset(source="uci", uci_id=94, n_samples = 100000)

In [23]:
#ds_phishing = load_and_process_dataset(source="uci", uci_id=967, n_samples = 100000)

In [25]:
# its dying, too much data for kernel to handle, need to fix this issue
# run it in google colab with 

In [27]:
# Pass into your NAS environment
env = NASMLPEnv(datasets=[ds_higgs, ds_forestcover, ds_diabetes, ds_student, ds_spam], max_layers=10)

In [29]:
from stable_baselines3 import DQN

import time

start_time = time.time()

model = DQN(
    "MlpPolicy",
    env,
    gamma=0.99,                       
    exploration_initial_eps=1.0,     
    exploration_final_eps=0.05,      
    exploration_fraction=0.1,        
    verbose=1,
    tensorboard_log="./nas_logs/"
)

#model = DQN("MlpPolicy", env, verbose=1, tensorboard_log="./nas_logs/")
model.learn(total_timesteps=100000)

end_time = time.time()
elapsed_time = end_time - start_time

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




Logging to ./nas_logs/DQN_39

🎯 Final Architecture: [('dense', 8, 'selu'), ('dense', 16, 'selu'), ('dense', 8, 'selu'), ('dense', 128, 'relu'), ('dense', 256, 'elu'), ('dense', 16, 'elu'), ('dense', 16, 'elu'), ('dense', 32, 'swish'), ('dense', 64, 'tanh'), ('dense', 16, 'sigmoid')]
🏆 Validation Accuracy (acc): 86.42%
⚙️  Complexity (number of layers): 98.20112374016811
🏅 Reward (acc - penalty): 89.14%

🌟 New Best Architecture Found with Reward: 89.14%

🎯 Final Architecture: [('dense', 256, 'tanh'), ('dense', 8, 'elu'), ('dense', 256, 'sigmoid'), ('dense', 64, 'relu'), ('dense', 64, 'sigmoid'), ('dense', 512, 'selu'), ('dense', 128, 'swish'), ('dense', 16, 'selu'), ('dense', 8, 'relu'), ('dense', 128, 'tanh')]
🏆 Validation Accuracy (acc): 92.41%
⚙️  Complexity (number of layers): 94.0546039339755
🏅 Reward (acc - penalty): 93.93%

🌟 New Best Architecture Found with Reward: 93.93%

🎯 Final Architecture: [('dense', 512, 'swish'), ('dropout', 0.5), ('dense', 512, 'sigmoid'), ('dense', 64, 

In [30]:
model.save("nas_agent_checkpoint_data_specific_alpha_0.8_epoch_reward")

In [11]:
from stable_baselines3 import DQN
model = DQN.load(
    "nas_agent_checkpoint_data_specific_alpha_0.8_epoch_reward",
    env=infer_env
)

NameError: name 'infer_env' is not defined

In [31]:
print(elapsed_time)

5448.773490190506


In [32]:
print(env.best_architecture)

[('dense', 8, 'sigmoid'), ('dropout', 0.2), ('dense', 32, 'sigmoid'), ('dense', 8, 'sigmoid')]


In [33]:
print(f"\n Best Reward: {env.best_reward:.2f}%")


 Best Reward: 95.66%


In [34]:
print(f"\n Best Accuracy: {env.best_accuracy:.2f}%")


 Best Accuracy: 94.58%


In [35]:
print(f"\n Best Complexity: {env.best_complexity:.2f}%")


 Best Complexity: 99.96%


# Predicting architecture for new dataset

In [129]:
import torch
from stable_baselines3 import DQN
new_ds = load_and_process_dataset(source="openml", dataset_id=23512)

In [131]:
infer_env  = NASMLPEnv(datasets=[new_ds], max_layers=10)

In [133]:
from stable_baselines3 import DQN
model = DQN.load(
    "nas_agent_checkpoint_data_specific_alpha_0.8_epoch_reward",
    env=infer_env
)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [135]:
# 4) Roll out one “episode” with the learned policy:
obs = infer_env.reset()
done = False
while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = infer_env.step(action)

# 5) Extract the proposed architecture:
#    - The last completed episode is at the end of `architecture_log`
predicted = infer_env.architecture_log[-1]
best_architecture = predicted["architecture"]
best_reward       = predicted["reward"]
best_accuracy     = predicted["accuracy"]
complexity_epochs     = predicted["complexity_epochs"]

print("🏷️  Proposed architecture:", best_architecture)
print(f"🎯  Expected validation accuracy: {best_accuracy:.2f}%")
print(f"🎯  Expected complexitty epochs: {complexity_epochs:.2f}%")
print(f"🏅  Predicted reward: {best_reward:.2f}")


🎯 Final Architecture: [('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid')]
🏆 Validation Accuracy (acc): 53.32%
⚙️  Complexity (number of layers): 99.96226538777987
🏅 Reward (acc - penalty): 62.66%

🌟 New Best Architecture Found with Reward: 62.66%
🏷️  Proposed architecture: [('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid'), ('dense', 8, 'sigmoid')]
🎯  Expected validation accuracy: 53.32%
🎯  Expected complexitty epochs: 96.00%
🏅  Predicted reward: 62.66


In [136]:
X_train = new_ds[0]
y_train = new_ds[1]
X_valid = new_ds[2]
y_valid = new_ds[3]

In [113]:
X_train.shape

torch.Size([10000, 10])

In [117]:
X_train

tensor([[ 1.3428,  1.3363, -1.3435,  ...,  0.8052, -1.3424, -0.2631],
        [ 1.3428,  0.2700, -0.4489,  ...,  0.0051, -1.3424,  0.2717],
        [ 0.4470,  0.0034, -0.4489,  ...,  0.8052,  1.3383,  0.0043],
        ...,
        [ 0.4470,  1.6028,  0.4457,  ...,  0.8052,  1.3383,  1.6088],
        [-0.4488,  0.0034, -1.3435,  ...,  0.5385,  0.4448,  1.0740],
        [ 1.3428,  0.2700, -1.3435,  ..., -1.5952, -0.4488,  1.3414]])

In [91]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Activation
from tensorflow.keras.optimizers import Adam

In [119]:
model = Sequential([
        Dense(128, input_dim=10),
        Activation('relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32),
        Activation('relu'),
        Dense(10),
        Activation('sigmoid')
    ])

In [121]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [123]:
model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=1024,
    validation_data=(X_valid, y_valid)
)

Epoch 1/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3194 - loss: 0.5206 - val_accuracy: 0.4710 - val_loss: 0.3373
Epoch 2/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4953 - loss: 0.2007 - val_accuracy: 0.5011 - val_loss: 0.2204
Epoch 3/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5056 - loss: 0.1811 - val_accuracy: 0.5266 - val_loss: 0.1839
Epoch 4/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5225 - loss: 0.1758 - val_accuracy: 0.5272 - val_loss: 0.1740
Epoch 5/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5233 - loss: 0.1736 - val_accuracy: 0.5319 - val_loss: 0.1711
Epoch 6/50
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5324 - loss: 0.1718 - val_accuracy: 0.5349 - val_loss: 0.1704
Epoch 7/50
[1m88/88[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x127972240>

In [125]:
loss, accuracy = model.evaluate(X_valid, y_valid)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 525us/step - accuracy: 0.5824 - loss: 0.1586
Validation Accuracy: 58.70%


In [127]:
loss, accuracy = model.evaluate(X_train, y_train)
print(f"Train Accuracy: {accuracy * 100:.2f}%")

[1m2813/2813[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 514us/step - accuracy: 0.6000 - loss: 0.1557
Train Accuracy: 59.93%


In [None]:
# need to understand how I can apply this to check for specific datasets