# Lazy Initialization

In [1]:
import torch
from torch import nn
from d2l import torch as d2l

- Till now, our models were working even when we initialized them randomly without thinking through the network architecture.
- This was because we were using lazy initialization where the framework `torch in our case` figured out later when we passed our inputs for the first time and initialized the network accordingly

In [2]:
net = nn.Sequential(nn.LazyLinear(256), nn.ReLU(), nn.LazyLinear(10))



In [3]:
# We can now check that the parameters of the different layers in the net are not yet defined
net[0].weight

<UninitializedParameter>

In [5]:
# Now once we pass some input to the model
X = torch.rand((2, 20))
net(X)
net[0].weight.shape

torch.Size([256, 20])

In [6]:
@d2l.add_to_class(d2l.Module)  #@save
def apply_init(self, inputs, init=None):
    self.forward(inputs)
    if init is not None:
        self.net.apply(init)