# Learning Dynamics - Zero Initialization

In order to see the result of the experiments run the following command from the command line in the project root directory `tensorboard --logdir results`.

In [None]:
import random

import numpy as np
import torch
from torch import Generator
from torch.optim import SGD

from data import get_dataloader, seed_worker
from models import MNISTFNNModel, MNISTCNNModel, CIFARCNNModel, LogisticRegression, CIFARFNNModel
from train import Trainer, TrainerConfig, run_experiment

In [None]:
R_SEED = 4240

torch.manual_seed(R_SEED)
random.seed(R_SEED)
np.random.seed(R_SEED)

# 1. MNIST dataset

In [None]:
MNIST_EPOCHS = 1

### 1.1 Baseline

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=100, flatten=True)
train_loader = get_dataloader(dataset="mnist", train=True, batch_size=100, flatten=True, num_workers=1,
                              worker_init_fn=seed_worker, generator=generator)

model = LogisticRegression(28 * 28, 10)

trainer = Trainer(model, model_name="MNIST_LOGISTIC")
trainer.train(train_loader, MNIST_EPOCHS)
trainer.test(test_loader)

### 1.2 FNN Model

#### 1.2.1 Models without zero initialization

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=100, flatten=True)
train_loader = get_dataloader(dataset="mnist", train=True, batch_size=100, flatten=True, num_workers=1,
                              worker_init_fn=seed_worker, generator=generator)

model = MNISTFNNModel()

trainer = Trainer(model, model_name="MNIST_FNN")
trainer.train(train_loader, MNIST_EPOCHS)
trainer.test(test_loader)

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=16, flatten=True)
train_loader = get_dataloader(dataset="mnist", train=True, batch_size=16, flatten=True, num_workers=1,
                              worker_init_fn=seed_worker, generator=generator)

model = MNISTFNNModel()

trainer = Trainer(model, model_name="MNIST_FNN_MINI_BATCH")
trainer.train(train_loader, MNIST_EPOCHS)
trainer.test(test_loader)

#### 1.2.2 Experiments with zero initialisation

In [None]:
configs = [
    TrainerConfig(model_name="MNIST_FNN_ZERO", epochs=FNN_EPOCHS, batch_size=100, initialization_mode="zero"),
    TrainerConfig(model_name="MNIST_FNN_NORMAL", epochs=FNN_EPOCHS, batch_size=100, initialization_mode="normal"),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_DOWN_SCALED", epochs=FNN_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=.5),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_UP_SCALED", epochs=FNN_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=2.),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_SGD", epochs=FNN_EPOCHS, batch_size=100, initialization_mode="normal",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_MINI_BATCH", epochs=FNN_EPOCHS, batch_size=16,
                  initialization_mode="normal"),

    TrainerConfig(model_name="MNIST_FNN_UNIFORM", epochs=FNN_EPOCHS, batch_size=100, initialization_mode="uniform"),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_DOWN_SCALED", epochs=FNN_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=.5),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_UP_SCALED", epochs=FNN_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=2.),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_SGD", epochs=FNN_EPOCHS, batch_size=100, initialization_mode="uniform",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_MINI_BATCH", epochs=FNN_EPOCHS, batch_size=16,
                  initialization_mode="uniform"),
]

In [None]:
for config in configs:
    model = MNISTFNNModel()
    config.optimizer = config.optimizer if config.optimizer is None else config.optimizer(model.parameters(), lr=0.01)
    run_experiment(model=model, dataset="mnist", config=config, seed=R_SEED)
    del model

### 1.3 CNN Model

#### 1.3.1 Models without zero initialization

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=100)
train_loader = get_dataloader(dataset="mnist", train=True, batch_size=100, num_workers=1, worker_init_fn=seed_worker,
                              generator=generator)

model = MNISTCNNModel()

trainer = Trainer(model, model_name="MNIST_CNN")
trainer.train(train_loader, MNIST_EPOCHS)
trainer.test(test_loader)

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=16)
train_loader = get_dataloader(dataset="mnist", train=True, batch_size=16, num_workers=1, worker_init_fn=seed_worker,
                              generator=generator)

model = MNISTCNNModel()

trainer = Trainer(model, model_name="MNIST_CNN_MINI_BATCH")
trainer.train(train_loader, MNIST_EPOCHS)
trainer.test(test_loader)

#### 1.3.2 Experiments with zero initialization

In [None]:
configs = [
    TrainerConfig(model_name="MNIST_CNN_ZERO", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="zero"),

    TrainerConfig(model_name="MNIST_CNN_NORMAL", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="normal"),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=.5),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=2.),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_SGD", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="normal",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_MINI_BATCH", epochs=MNIST_EPOCHS, batch_size=16,
                  initialization_mode="normal"),

    TrainerConfig(model_name="MNIST_CNN_UNIFORM", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="uniform"),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=.5),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=2.),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_SGD", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_MINI_BATCH", epochs=MNIST_EPOCHS, batch_size=16,
                  initialization_mode="uniform"),
]

In [None]:
for config in configs:
    model = MNISTCNNModel()
    config.optimizer = config.optimizer if config.optimizer is None else config.optimizer(model.parameters(), lr=0.002)
    run_experiment(model=model, dataset="mnist", config=config, seed=R_SEED)
    del model

# 2. CIFAR 10 dataset

In [None]:
CIFAR_EPOCHS = 1
CIFAR_BATCH_SIZE = 16

### 2.1 Baseline

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="cifar10", train=False, batch_size=CIFAR_BATCH_SIZE, flatten=True)
train_loader = get_dataloader(dataset="cifar10", train=True, batch_size=CIFAR_BATCH_SIZE, flatten=True, num_workers=1,
                              worker_init_fn=seed_worker, generator=generator)

model = LogisticRegression(32 * 32 * 3, 10)

trainer = Trainer(model, model_name="CIFAR_LOGISTIC")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

### 2.2 FNN Model

#### 2.2.1 Models without zero initialization

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="cifar10", train=False, flatten=True, batch_size=CIFAR_BATCH_SIZE)
train_loader = get_dataloader(dataset="cifar10", train=True, flatten=True, batch_size=CIFAR_BATCH_SIZE, num_workers=1,
                              worker_init_fn=seed_worker, generator=generator)

model = CIFARFNNModel()
trainer = Trainer(model, model_name="CIFAR_FNN")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

#### 2.2.2 Experiments with zero initialization

In [None]:
model = CIFARFNNModel()
model.zero_initialization("zero")

trainer = Trainer(model, model_name="CIFAR10_FNN_ZERO")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

In [None]:
model = CIFARFNNModel()
model.zero_initialization("uniform")

trainer = Trainer(model, model_name="CIFAR10_FNN_UNIFORM")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

In [None]:
model = CIFARFNNModel()
model.zero_initialization("normal")

trainer = Trainer(model, model_name="CIFAR10_FNN_NORMAL")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

### 2.3 CNN Model

#### 2.3.1 Models without zero initialization

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="cifar10", train=False, batch_size=CIFAR_BATCH_SIZE)
train_loader = get_dataloader(dataset="cifar10", train=True, batch_size=CIFAR_BATCH_SIZE, num_workers=1,
                              worker_init_fn=seed_worker, generator=generator)

model = CIFARCNNModel()

trainer = Trainer(model, model_name="CIFAR10_CNN")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

#### 2.3.2 Experiments with zero initialization

In [None]:
model = CIFARCNNModel()
model.zero_initialization("zero")

trainer = Trainer(model, model_name="CIFAR10_CNN_ZERO")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

In [None]:
model = CIFARCNNModel()
model.zero_initialization("uniform")

trainer = Trainer(model, model_name="CIFAR10_CNN_UNIFORM")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

In [None]:
model = CIFARCNNModel()
model.zero_initialization("normal")

trainer = Trainer(model, model_name="CIFAR10_CNN_NORMAL")
trainer.train(train_loader, CIFAR_EPOCHS)
trainer.test(test_loader)