In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import random
import pandas as pd



In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


In [15]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


100%|██████████| 9.91M/9.91M [00:00<00:00, 22.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 612kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 5.71MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.73MB/s]


In [16]:

SEARCH_SPACE = {
    "num_layers": [2, 3],
    "hidden_units": [32, 64],
    "activation": ["relu", "tanh"]
}


In [17]:
class NASModel(nn.Module):
    def __init__(self, input_size, output_size, num_layers, hidden_units, activation):
        super().__init__()
        layers = []
        in_features = input_size

        for _ in range(num_layers):
            layers.append(nn.Linear(in_features, hidden_units))
            layers.append(nn.ReLU() if activation == "relu" else nn.Tanh())
            in_features = hidden_units

        layers.append(nn.Linear(hidden_units, output_size))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.network(x)


In [18]:
def sample_architecture():
    return {
        "num_layers": random.choice(SEARCH_SPACE["num_layers"]),
        "hidden_units": random.choice(SEARCH_SPACE["hidden_units"]),
        "activation": random.choice(SEARCH_SPACE["activation"])
    }


In [19]:
def mutate(arch):
    new_arch = arch.copy()
    key = random.choice(list(SEARCH_SPACE.keys()))
    new_arch[key] = random.choice(SEARCH_SPACE[key])
    return new_arch


In [20]:
def count_params(model):
    return sum(p.numel() for p in model.parameters())


In [21]:
def train_and_evaluate(arch, epochs=2):
    model = NASModel(
        input_size=28*28,
        output_size=10,
        num_layers=arch["num_layers"],
        hidden_units=arch["hidden_units"],
        activation=arch["activation"]
    ).to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Training
    model.train()
    for _ in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()

    accuracy = correct / len(test_dataset)
    params = count_params(model)

    return accuracy, params


In [22]:
results = []
population = []

NUM_INITIAL = 3
NUM_GENERATIONS = 3

# Initial Random Search
for _ in range(NUM_INITIAL):
    arch = sample_architecture()
    acc, params = train_and_evaluate(arch)
    population.append((arch, acc, params))
    results.append({"Architecture": arch, "Accuracy": acc, "Params": params})

# Evolutionary Search
for _ in range(NUM_GENERATIONS):
    parent = max(population, key=lambda x: x[1])[0]
    child = mutate(parent)
    acc, params = train_and_evaluate(child)
    population.append((child, acc, params))
    results.append({"Architecture": child, "Accuracy": acc, "Params": params})


In [23]:
df = pd.DataFrame(results)
display(df)

best_model = df.loc[df["Accuracy"].idxmax()]
print("\nBest Architecture Found:")
print(best_model)


Unnamed: 0,Architecture,Accuracy,Params
0,"{'num_layers': 2, 'hidden_units': 32, 'activat...",0.9519,26506
1,"{'num_layers': 3, 'hidden_units': 64, 'activat...",0.9574,59210
2,"{'num_layers': 2, 'hidden_units': 64, 'activat...",0.9658,55050
3,"{'num_layers': 2, 'hidden_units': 64, 'activat...",0.9655,55050
4,"{'num_layers': 2, 'hidden_units': 64, 'activat...",0.9629,55050
5,"{'num_layers': 2, 'hidden_units': 64, 'activat...",0.9647,55050



Best Architecture Found:
Architecture    {'num_layers': 2, 'hidden_units': 64, 'activat...
Accuracy                                                   0.9658
Params                                                      55050
Name: 2, dtype: object
