In [6]:
import torch
from typing import Any

In [17]:
class MyModel(torch.nn.Module):
  def __init__(self, layer_size = [512, 512, 512]):
    super(MyModel, self).__init__()
    layers = []
    layers.append(torch.nn.Flatten())
    c = 128*128*3
    for s in layer_size:
      layers.append(torch.nn.Linear(c, s))
      layers.append(torch.nn.ReLU())
      c = s
    layers.append(torch.nn.Linear(c, 102))
    self.model = torch.nn.Sequential(*layers)

  def forward(self, x) -> Any:
    return self.model(x)

x = torch.randn(10, 3, 128, 128)
for n in range(10):
  netn = MyModel([512]*n)
  print(f'{n=}  {netn(x).norm()=}')

n=0  netn(x).norm()=tensor(18.4072, grad_fn=<LinalgVectorNormBackward0>)
n=1  netn(x).norm()=tensor(7.7973, grad_fn=<LinalgVectorNormBackward0>)
n=2  netn(x).norm()=tensor(3.0652, grad_fn=<LinalgVectorNormBackward0>)
n=3  netn(x).norm()=tensor(1.6371, grad_fn=<LinalgVectorNormBackward0>)
n=4  netn(x).norm()=tensor(1.0761, grad_fn=<LinalgVectorNormBackward0>)
n=5  netn(x).norm()=tensor(0.9141, grad_fn=<LinalgVectorNormBackward0>)
n=6  netn(x).norm()=tensor(0.9291, grad_fn=<LinalgVectorNormBackward0>)
n=7  netn(x).norm()=tensor(0.9252, grad_fn=<LinalgVectorNormBackward0>)
n=8  netn(x).norm()=tensor(0.9160, grad_fn=<LinalgVectorNormBackward0>)
n=9  netn(x).norm()=tensor(0.8260, grad_fn=<LinalgVectorNormBackward0>)


In [18]:
# No-bias model
class MyModelNoBias(torch.nn.Module):
  def __init__(self, layer_size = [512, 512, 512]):
    super(MyModelNoBias, self).__init__()
    layers = []
    layers.append(torch.nn.Flatten())
    c = 128*128*3
    for s in layer_size:
      layers.append(torch.nn.Linear(c, s, bias=False))
      layers.append(torch.nn.ReLU())
      c = s
    layers.append(torch.nn.Linear(c, 102, bias=False))
    self.model = torch.nn.Sequential(*layers)

  def forward(self, x) -> Any:
    return self.model(x)

x = torch.randn(10, 3, 128, 128)
for n in range(10):
  netn = MyModelNoBias([512]*n)
  print(f'{n=}  {netn(x).norm()=}')

n=0  netn(x).norm()=tensor(18.3408, grad_fn=<LinalgVectorNormBackward0>)
n=1  netn(x).norm()=tensor(7.6698, grad_fn=<LinalgVectorNormBackward0>)
n=2  netn(x).norm()=tensor(3.1582, grad_fn=<LinalgVectorNormBackward0>)
n=3  netn(x).norm()=tensor(1.2290, grad_fn=<LinalgVectorNormBackward0>)
n=4  netn(x).norm()=tensor(0.5043, grad_fn=<LinalgVectorNormBackward0>)
n=5  netn(x).norm()=tensor(0.2126, grad_fn=<LinalgVectorNormBackward0>)
n=6  netn(x).norm()=tensor(0.0910, grad_fn=<LinalgVectorNormBackward0>)
n=7  netn(x).norm()=tensor(0.0335, grad_fn=<LinalgVectorNormBackward0>)
n=8  netn(x).norm()=tensor(0.0119, grad_fn=<LinalgVectorNormBackward0>)
n=9  netn(x).norm()=tensor(0.0048, grad_fn=<LinalgVectorNormBackward0>)


In [15]:
# No-bias model with batch normalization
class MyModelBN(torch.nn.Module):
  def __init__(self, layer_size = [512, 512, 512]):
    super(MyModelBN, self).__init__()
    layers = []
    layers.append(torch.nn.Flatten())
    c = 128*128*3
    for s in layer_size:
      layers.append(torch.nn.Linear(c, s, bias=False))
      layers.append(torch.nn.BatchNorm1d(s))
      layers.append(torch.nn.ReLU())
      c = s
    layers.append(torch.nn.Linear(c, 102, bias=False))
    self.model = torch.nn.Sequential(*layers)

  def forward(self, x) -> Any:
    return self.model(x)

x = torch.randn(10, 3, 128, 128)
for n in range(10):
  netn = MyModelBN([512]*n)
  print(f'{n=}  {netn(x).norm()=}')

n=0  netn(x).norm()=tensor(18.3326, grad_fn=<LinalgVectorNormBackward0>)
n=1  netn(x).norm()=tensor(12.6306, grad_fn=<LinalgVectorNormBackward0>)
n=2  netn(x).norm()=tensor(13.3191, grad_fn=<LinalgVectorNormBackward0>)
n=3  netn(x).norm()=tensor(13.4568, grad_fn=<LinalgVectorNormBackward0>)
n=4  netn(x).norm()=tensor(12.9636, grad_fn=<LinalgVectorNormBackward0>)
n=5  netn(x).norm()=tensor(12.9115, grad_fn=<LinalgVectorNormBackward0>)
n=6  netn(x).norm()=tensor(13.3095, grad_fn=<LinalgVectorNormBackward0>)
n=7  netn(x).norm()=tensor(12.5757, grad_fn=<LinalgVectorNormBackward0>)
n=8  netn(x).norm()=tensor(11.8785, grad_fn=<LinalgVectorNormBackward0>)
n=9  netn(x).norm()=tensor(13.1714, grad_fn=<LinalgVectorNormBackward0>)


In [19]:
# No-bias model with layer normalization
class MyModelLN(torch.nn.Module):
  def __init__(self, layer_size = [512, 512, 512]):
    super(MyModelLN, self).__init__()
    layers = []
    layers.append(torch.nn.Flatten())
    c = 128*128*3
    for s in layer_size:
      layers.append(torch.nn.Linear(c, s, bias=False))
      layers.append(torch.nn.LayerNorm(s))
      layers.append(torch.nn.ReLU())
      c = s
    layers.append(torch.nn.Linear(c, 102, bias=False))
    self.model = torch.nn.Sequential(*layers)

  def forward(self, x) -> Any:
    return self.model(x)

x = torch.randn(10, 3, 128, 128)
for n in range(10):
  netn = MyModelLN([512]*n)
  print(f'{n=}  {netn(x).norm()=}')

n=0  netn(x).norm()=tensor(18.2750, grad_fn=<LinalgVectorNormBackward0>)
n=1  netn(x).norm()=tensor(13.5601, grad_fn=<LinalgVectorNormBackward0>)
n=2  netn(x).norm()=tensor(12.4414, grad_fn=<LinalgVectorNormBackward0>)
n=3  netn(x).norm()=tensor(14.0265, grad_fn=<LinalgVectorNormBackward0>)
n=4  netn(x).norm()=tensor(11.9391, grad_fn=<LinalgVectorNormBackward0>)
n=5  netn(x).norm()=tensor(12.7917, grad_fn=<LinalgVectorNormBackward0>)
n=6  netn(x).norm()=tensor(13.8482, grad_fn=<LinalgVectorNormBackward0>)
n=7  netn(x).norm()=tensor(13.1121, grad_fn=<LinalgVectorNormBackward0>)
n=8  netn(x).norm()=tensor(12.0549, grad_fn=<LinalgVectorNormBackward0>)
n=9  netn(x).norm()=tensor(12.1029, grad_fn=<LinalgVectorNormBackward0>)
