# Lecture 8: Neural Architecture Search (Part II) - Hardware-Aware

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gaurav-redhat/transformer_problems/blob/efficientml-course/efficientml_course/08_neural_architecture_search_2/demo.ipynb)

Hardware-aware NAS with latency lookup tables.


In [None]:
!pip install torch -q
import torch
import torch.nn as nn
import torch.nn.functional as F

# Hardware-Aware NAS: Include latency in the loss!
# Latency lookup table (pre-measured on target device)
LATENCY_TABLE = {
    'conv3x3': 1.2,   # ms
    'conv5x5': 2.8,
    'conv7x7': 5.1,
    'dwconv3x3': 0.3,
    'dwconv5x5': 0.5,
    'skip': 0.0,
}

class HardwareAwareMixedOp(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.ops = nn.ModuleDict({
            'conv3x3': nn.Conv2d(channels, channels, 3, padding=1),
            'conv5x5': nn.Conv2d(channels, channels, 5, padding=2),
            'dwconv3x3': nn.Conv2d(channels, channels, 3, padding=1, groups=channels),
            'skip': nn.Identity(),
        })
        self.alpha = nn.Parameter(torch.zeros(len(self.ops)))
    
    def forward(self, x):
        weights = F.softmax(self.alpha, dim=0)
        return sum(w * self.ops[name](x) for w, name in zip(weights, self.ops.keys()))
    
    def get_latency(self):
        """Differentiable latency estimation"""
        weights = F.softmax(self.alpha, dim=0)
        latency = sum(w * LATENCY_TABLE.get(name, 1.0) 
                     for w, name in zip(weights, self.ops.keys()))
        return latency

# Demo
op = HardwareAwareMixedOp(64)
print("Hardware-Aware NAS Loss:")
print(f"  Expected latency: {op.get_latency():.2f} ms")
print("\nLoss = CE_loss + lambda * latency_loss")
print("ðŸŽ¯ This makes NAS optimize for both accuracy AND speed!")
