# Recipe 1: Basic ENAS with Parameter Sharing

This notebook implements the core ENAS algorithm with parameter sharing for a simple convolutional neural network.

## Imports and Setup

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Define constants
NUM_LAYERS = 4
NUM_OPS = 3  # relu, tanh, identity

## Shared CNN Implementation

In [2]:
class SharedCNN(nn.Module):
    def __init__(self, input_channels, num_classes):
        super().__init__()
        self.input_conv = nn.Conv2d(input_channels, 16, 3, padding=1)
        self.layers = nn.ModuleList([nn.Conv2d(16, 16, 3, padding=1) for _ in range(NUM_LAYERS)])
        self.ops = nn.ModuleList([
            nn.ReLU(),
            nn.Tanh(),
            nn.Identity()
        ])
        self.classifier = nn.Linear(16, num_classes)

    def forward(self, x, actions):
        x = self.input_conv(x)
        for layer, action in zip(self.layers, actions):
            x = layer(x)
            x = self.ops[action](x)
        x = x.mean([2, 3])
        return self.classifier(x)

## Controller Implementation

In [3]:
class Controller(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.lstm = nn.LSTMCell(NUM_OPS, hidden_size)
        self.linear = nn.Linear(hidden_size, NUM_OPS)

    def forward(self, num_cells):
        h, c = torch.zeros(1, self.lstm.hidden_size), torch.zeros(1, self.lstm.hidden_size)
        actions = []
        log_probs = []
        for _ in range(num_cells):
            x = torch.zeros(1, NUM_OPS)
            h, c = self.lstm(x, (h, c))
            logits = self.linear(h)
            probs = torch.softmax(logits, dim=-1)
            action = torch.multinomial(probs, 1).item()
            actions.append(action)
            log_probs.append(torch.log(probs[0, action]))
        return actions, torch.stack(log_probs)

## Training and Evaluation Functions

In [4]:
def train_enas(shared_cnn, controller, train_data, val_data, num_epochs):
    shared_optim = optim.Adam(shared_cnn.parameters(), lr=0.01)
    controller_optim = optim.Adam(controller.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        # Train shared parameters
        shared_cnn.train()
        for x, y in train_data:
            actions, _ = controller(NUM_LAYERS)
            shared_optim.zero_grad()
            loss = nn.CrossEntropyLoss()(shared_cnn(x, actions), y)
            loss.backward()
            shared_optim.step()

        # Evaluate architectures
        shared_cnn.eval()
        rewards = []
        for _ in range(10):  # Sample 10 architectures
            actions, log_probs = controller(NUM_LAYERS)
            with torch.no_grad():
                acc = evaluate(shared_cnn, actions, val_data)
            rewards.append(acc)

        # Update controller
        reward = torch.tensor(rewards) - reward.mean()
        controller_optim.zero_grad()
        controller_loss = -(log_probs * reward).sum()
        controller_loss.backward()
        controller_optim.step()

        print(f"Epoch {epoch}, Avg Reward: {reward.mean().item():.4f}")

def evaluate(model, actions, data):
    correct = 0
    total = 0
    for x, y in data:
        with torch.no_grad():
            outputs = model(x, actions)
            _, predicted = outputs.max(1)
            correct += (predicted == y).sum().item()
            total += y.size(0)
    return correct / total

## Usage Example

In [5]:
# Assuming you have your data loaded as train_data and val_data
shared_cnn = SharedCNN(3, 10)  # Assuming CIFAR-10
controller = Controller(100)
# Uncomment the following line to train
train_enas(shared_cnn, controller, train_data, val_data, num_epochs=50)

NameError: name 'train_data' is not defined