In [38]:
import torch
import torch.nn as nn

In [39]:
class ModelWithParameter(nn.Module):
    def __init__(self, no_grad):
        super().__init__()

        # This is a Parameter - will be tracked automatically
        self.weight = nn.Parameter(torch.randn(3, 3))
        self.free_weights_1 = nn.Parameter(torch.randn(3, 2))
        self.free_weights_2 = nn.Parameter(torch.randn(3, 4))

        # freeze if no_grad is passed in.
        if no_grad:
            self.weight.requires_grad = False

    def forward(self, x):
        return x @ self.weight

In [40]:
class ModelWithTensor(nn.Module):
    def __init__(self):
        super().__init__()
        # This is just a Tensor - will NOT be tracked as a parameter
        self.cache = torch.randn(3, 3)
        
    def forward(self, x):
        return x @ self.cache

In [41]:
# Create both models
model_param = ModelWithParameter(no_grad=True)
model_tensor = ModelWithTensor()

In [42]:
# Check parameters
print("ModelWithParameter parameters:")
for name, param in model_param.named_parameters():
    print(f"  {name}: {param.shape} {param.requires_grad} {param.grad} {param.numel()}")
# Output: weight: torch.Size([3, 3])

print("\nModelWithTensor parameters:")
for name, param in model_tensor.named_parameters():
    print(f"  {name}: {param.shape} {param.requires_grad} {param.grad}")

ModelWithParameter parameters:
  weight: torch.Size([3, 3]) False None 9
  free_weights_1: torch.Size([3, 2]) True None 6
  free_weights_2: torch.Size([3, 4]) True None 12

ModelWithTensor parameters:


In [43]:
for p in model_param.parameters():
    if p.requires_grad:
        print(p.shape)

torch.Size([3, 2])
torch.Size([3, 4])


In [44]:
# 1. Parameters are auto-registered
class Demo(nn.Module):
    def __init__(self):
        super().__init__()
        self.param = nn.Parameter(torch.randn(2, 2))  # Auto-registered ✓
        self.tensor = torch.randn(2, 2)               # Not registered ✗

demo = Demo()
print(list(demo.parameters()))  # Only shows self.param

# 2. Parameters appear in optimizer
optimizer = torch.optim.SGD(demo.parameters(), lr=0.01)
# Only self.param will be updated during training!

# 3. Parameters move with .to() calls
demo.to('cuda')  # self.param moves to GPU
                 # self.tensor stays on CPU!

# 4. Parameters are included in state_dict()
state = demo.state_dict()
print(state.keys())  # Only 'param', not 'tensor'

[Parameter containing:
tensor([[ 0.0083,  0.2546],
        [-0.3408, -0.1763]], requires_grad=True)]
odict_keys(['param'])
