In [1]:
from utils import (
    get_mnist_data_loaders,
    NN_FC_CrossEntropy,
    compute_validation_accuracy_multi,
    train_one_epoch,
)

from fastprogress.fastprogress import master_bar

import torch

import matplotlib.pyplot as plt
from jupyterthemes import jtplot

jtplot.style(context="talk")

In [2]:
# Configuration parameters
data_path = "../data"
seed = 0
torch.manual_seed(seed)

# Hyperparameters
num_epochs = 4
batch_size = 128
valid_batch_size = 0
# The optimizer includes default hyperparameter values

# Training device
device = "cpu" # "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using '{device}' device.")

Using 'cpu' device.


In [3]:
# Get data loaders
train_loader, valid_loader = get_mnist_data_loaders(
    data_path, batch_size, valid_batch_size
)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [32]:
# Create neural network model
nx = train_loader.dataset.data.shape[1:].numel()
ny = len(train_loader.dataset.classes)
layer_sizes = (nx, 20, 20, ny)

model = NN_FC_CrossEntropy(layer_sizes, torch.nn.Sigmoid).to(device)

# Training utilities
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [33]:
def init_weights(layer):
    if type(layer) == torch.nn.Linear:
        print("Initializing", layer)
        
        if kind == "zeros":
            layer.weight.data.fill_(0.0)
            layer.bias.data.fill_(0.0)
        
        elif kind == "ones":
            layer.weight.data.fill_(1.0)
            layer.bias.data.fill_(1.0)

        elif kind == "uniform":
            layer.weight.data.uniform_()
            layer.bias.data.fill_(0.0)
        
        elif kind == "normal":        
            layer.weight.data.normal_()
            layer.bias.data.fill_(0.0)

        elif kind == "normal2":        
            layer.weight.data.normal_() * (1 / torch.sqrt(layer.weight.shape[0]))
            layer.bias.data.fill_(0.0)
        
        elif kind == "xavier":
            torch.nn.init.xavier_uniform_(layer.weight)
        
        elif kind == "kaiming":
            torch.nn.init.kaiming_normal_(layer.weight)
        
        else:
            print(f"'{kind}' is not handled")


kind = "kaiming"
model.apply(init_weights)

Initializing Linear(in_features=784, out_features=20, bias=True)
Initializing Linear(in_features=20, out_features=20, bias=True)
Initializing Linear(in_features=20, out_features=10, bias=True)


NN_FC_CrossEntropy(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Sequential(
      (0): Linear(in_features=784, out_features=20, bias=True)
      (1): Sigmoid()
    )
    (2): Sequential(
      (0): Linear(in_features=20, out_features=20, bias=True)
      (1): Sigmoid()
    )
    (3): Linear(in_features=20, out_features=10, bias=True)
  )
)

In [34]:
# Training loop
mb = master_bar(range(num_epochs))
compute_validation_accuracy_multi(valid_loader, model, criterion, device, mb, 0)
for epoch in mb:
    train_one_epoch(train_loader, model, criterion, optimizer, device, mb)
    loss, accuracy = compute_validation_accuracy_multi(
        valid_loader, model, criterion, device, mb, epoch + 1
    )

In [22]:
X, Y = next(iter(train_loader))
X.shape, Y.shape

(torch.Size([128, 1, 28, 28]), torch.Size([128]))

In [37]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 100),
    torch.nn.BatchNorm1d(100),
    torch.nn.Linear(100, 100),
    torch.nn.Linear(100, 100),
    torch.nn.Linear(100, 10)
)

kind = "kaiming"
model.apply(init_weights)

with torch.no_grad():
    A = X
#     for layer in model.layers:
    for layer in model:
        std, mean = torch.std_mean(A)
        print(f"*** Mean = {mean.item():.3f}, STD = {std.item():.3f}")
        print(layer)
#         if hasattr(layer, "weight"):
#             print(torch.std_mean(layer.weight))
        A = layer(A)
    std, mean = torch.std_mean(A)
    print(f"*** Mean = {mean.item():.3f}, STD = {std.item():.3f}")

Initializing Linear(in_features=784, out_features=100, bias=True)
Initializing Linear(in_features=100, out_features=100, bias=True)
Initializing Linear(in_features=100, out_features=100, bias=True)
Initializing Linear(in_features=100, out_features=10, bias=True)
*** Mean = -0.013, STD = 0.986
Flatten(start_dim=1, end_dim=-1)
*** Mean = -0.013, STD = 0.986
Linear(in_features=784, out_features=100, bias=True)
*** Mean = -0.011, STD = 1.438
BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
*** Mean = -0.000, STD = 1.000
Linear(in_features=100, out_features=100, bias=True)
*** Mean = -0.016, STD = 1.441
Linear(in_features=100, out_features=100, bias=True)
*** Mean = 0.001, STD = 2.057
Linear(in_features=100, out_features=10, bias=True)
*** Mean = 0.050, STD = 3.143


In [68]:
X.shape

torch.Size([128, 1, 28, 28])

In [None]:
torch.nn.BatchNorm1d()