# Training neural network with zero weight initialization

For a detailed explanation of the experiments in this file read the corresponding paper in the repository.

In order to see the result of the experiments run the following command from the command line in the project root directory `tensorboard --logdir results`.

In [None]:
import os
import random
import warnings

import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
from torch import Generator
from torch.nn import Sigmoid
from torch.optim import SGD

from data import get_dataloader, seed_worker
from models import MNISTFNNModel, MNISTCNNModel, CIFARCNNModel, LogisticRegression, CIFARFNNModel, ResNet50
from train import Trainer, TrainerConfig, run_experiment
from utils import read_summary_files_to_df, read_gradient_summary_to_df, plot_summary, \
    plot_gradient_distribution_comparison

warnings.filterwarnings('ignore')

# 1 MNIST dataset

In [None]:
R_SEED = 4240
MNIST_EPOCHS = 5

torch.manual_seed(R_SEED)
random.seed(R_SEED)
np.random.seed(R_SEED)

### 1.1 Baseline Model

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=100, flatten=True)
train_loader, validation_loader = get_dataloader(dataset="mnist", train=True, batch_size=100, flatten=True,
                                                 num_workers=1, worker_init_fn=seed_worker, generator=generator)

model = LogisticRegression(28 * 28, 10)

trainer = Trainer(model, model_name="MNIST_LOGISTIC")
trainer.train(train_loader, validation_loader, MNIST_EPOCHS)
trainer.test(test_loader)

### 1.2 FNN Model

#### 1.2.1 Models without zero initialization

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=100, flatten=True)
train_loader, validation_loader = get_dataloader(dataset="mnist", train=True, batch_size=100, flatten=True,
                                                 num_workers=1,
                                                 worker_init_fn=seed_worker, generator=generator)

model = MNISTFNNModel()

trainer = Trainer(model, model_name="MNIST_FNN")
trainer.train(train_loader, validation_loader, MNIST_EPOCHS)
trainer.test(test_loader)

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=16, flatten=True)
train_loader, validation_loader = get_dataloader(dataset="mnist", train=True, batch_size=16, flatten=True,
                                                 num_workers=1,
                                                 worker_init_fn=seed_worker, generator=generator)

model = MNISTFNNModel()

trainer = Trainer(model, model_name="MNIST_FNN_MINI_BATCH")
trainer.train(train_loader, validation_loader, MNIST_EPOCHS)
trainer.test(test_loader)

#### 1.2.2 Experiments with zero initialisation

In [None]:
configs = [
    TrainerConfig(model_name="MNIST_FNN_ZERO", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="zero"),
    TrainerConfig(model_name="MNIST_FNN_ZERO_SIGMOID", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="zero",
                  activation_fun=Sigmoid()),

    TrainerConfig(model_name="MNIST_FNN_NORMAL", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="normal"),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=.01),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=100),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_SGD", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="normal",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_MINI_BATCH", epochs=MNIST_EPOCHS, batch_size=16,
                  initialization_mode="normal"),

    TrainerConfig(model_name="MNIST_FNN_UNIFORM", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="uniform"),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=.01),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=100),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_SGD", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM_MINI_BATCH", epochs=MNIST_EPOCHS, batch_size=16,
                  initialization_mode="uniform"),
]

In [None]:
for config in configs:
    model = MNISTFNNModel(activation_fun=config.activation_fun)
    config.optimizer = config.optimizer if config.optimizer is None else config.optimizer(model.parameters(), lr=0.001)
    run_experiment(model=model, dataset="mnist", config=config, seed=R_SEED)
    del model

#### 1.2.3 Final Results

In [None]:
torch.initial_seed()
random.seed()
np.random.seed()

MNIST_EPOCHS = 10
BATCH_SIZE = 128
RUNS = 5

configs = [
    TrainerConfig(model_name="MNIST_FNN_BASELINE", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="default"),
    TrainerConfig(model_name="MNIST_FNN", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE, initialization_mode="default"),
    TrainerConfig(model_name="MNIST_FNN_NORMAL", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="normal"),
    TrainerConfig(model_name="MNIST_FNN_UNIFORM", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="uniform"),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="normal", initialization_factor=100),
    TrainerConfig(model_name="MNIST_FNN_NORMAL_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="normal", initialization_factor=.01),
]

for config in configs:
    if "BASELINE" in config.model_name:
        model = LogisticRegression(28 * 28, 10)
    else:
        model = MNISTFNNModel(activation_fun=config.activation_fun)

    run_experiment(model=model, dataset="mnist", config=config, runs=RUNS, train_summary=False,
                   validation_summary=False, validate_after_epoch=False, test_after_epoch=True)
    del model

In [None]:
mnist_baseline_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN_BASELINE"),
                                             model_name="Baseline model", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_fnn_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN"),
                                        model_name="LeCun initialization",
                                        n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_fnn_normal_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN_NORMAL"),
                                               model_name="Normal initialization", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_fnn_uniform_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN_UNIFORM"),
                                                model_name="Uniform initialization", n_runs=RUNS, n_epochs=MNIST_EPOCHS)

mnist_fnn_results = pd.concat([mnist_baseline_df, mnist_fnn_df, mnist_fnn_normal_df, mnist_fnn_uniform_df])

plot_summary(mnist_fnn_results, "MNIST FNN Test Accuracy")

In [None]:
mnist_fnn_normal_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN_NORMAL"),
                                               model_name="Unscaled bias", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_fnn_down_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN_NORMAL_DOWN_SCALED"),
                                             model_name="Downscaled bias", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_fnn_up_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN_NORMAL_UP_SCALED"),
                                           model_name="Upscaled bias", n_runs=RUNS, n_epochs=MNIST_EPOCHS)

mnist_fnn_results = pd.concat([mnist_fnn_normal_df, mnist_fnn_down_df, mnist_fnn_up_df])

plot_summary(mnist_fnn_results, "MNIST FNN Scaled Test Accuracy")

### 1.3 CNN Model

In [None]:
R_SEED = 4240
MNIST_EPOCHS = 5

torch.manual_seed(R_SEED)
random.seed(R_SEED)
np.random.seed(R_SEED)

#### 1.3.1 Models without zero initialization

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=100)
train_loader, validation_loader = get_dataloader(dataset="mnist", train=True, batch_size=100, num_workers=1,
                                                 worker_init_fn=seed_worker,
                                                 generator=generator)

model = MNISTCNNModel()

trainer = Trainer(model, model_name="MNIST_CNN")
trainer.train(train_loader, validation_loader, MNIST_EPOCHS)
trainer.test(test_loader)

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="mnist", train=False, batch_size=16)
train_loader, validation_loader = get_dataloader(dataset="mnist", train=True, batch_size=16, num_workers=1,
                                                 worker_init_fn=seed_worker,
                                                 generator=generator)

model = MNISTCNNModel()

trainer = Trainer(model, model_name="MNIST_CNN_MINI_BATCH")
trainer.train(train_loader, validation_loader, MNIST_EPOCHS)
trainer.test(test_loader)

#### 1.3.2 Experiments with zero initialization

In [None]:
configs = [
    TrainerConfig(model_name="MNIST_CNN_ZERO", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="zero"),
    TrainerConfig(model_name="MNIST_CNN_ZERO_SIGMOID", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="zero",
                  activation_fun=Sigmoid()),

    TrainerConfig(model_name="MNIST_CNN_NORMAL", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="normal"),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=.01),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="normal", initialization_factor=100),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_SGD", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="normal",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_MINI_BATCH", epochs=MNIST_EPOCHS, batch_size=16,
                  initialization_mode="normal"),

    TrainerConfig(model_name="MNIST_CNN_UNIFORM", epochs=MNIST_EPOCHS, batch_size=100, initialization_mode="uniform"),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=.01),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform", initialization_factor=100),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_SGD", epochs=MNIST_EPOCHS, batch_size=100,
                  initialization_mode="uniform",
                  optimizer=SGD),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM_MINI_BATCH", epochs=MNIST_EPOCHS, batch_size=16,
                  initialization_mode="uniform"),
]

In [None]:
for config in configs:
    model = MNISTCNNModel()
    config.optimizer = config.optimizer if config.optimizer is None else config.optimizer(model.parameters(), lr=0.001)
    run_experiment(model=model, dataset="mnist", config=config, seed=R_SEED)
    del model

#### 1.3.3 Final Results

In [None]:
torch.initial_seed()
random.seed()
np.random.seed()

MNIST_EPOCHS = 15
BATCH_SIZE = 128
RUNS = 5

configs = [
    TrainerConfig(model_name="MNIST_CNN", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE, initialization_mode="default"),
    TrainerConfig(model_name="MNIST_CNN_NORMAL", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="normal"),
    TrainerConfig(model_name="MNIST_CNN_UNIFORM", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="uniform"),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_UP_SCALED", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="normal", initialization_factor=100),
    TrainerConfig(model_name="MNIST_CNN_NORMAL_DOWN_SCALED", epochs=MNIST_EPOCHS, batch_size=BATCH_SIZE,
                  initialization_mode="normal", initialization_factor=.01),
]

for config in configs:
    model = MNISTCNNModel()

    run_experiment(model=model, dataset="mnist", config=config, runs=5, train_summary=False, validation_summary=False,
                   validate_after_epoch=False, test_after_epoch=True)
    del model

In [None]:
mnist_baseline_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_FNN_BASELINE"),
                                             model_name="Baseline model", n_runs=RUNS, n_epochs=10)
mnist_cnn_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_CNN"),
                                        model_name="LeCun initialization",
                                        n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_cnn_normal_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_CNN_NORMAL"),
                                               model_name="Normal initialization", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_cnn_uniform_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_CNN_UNIFORM"),
                                                model_name="Uniform initialization", n_runs=RUNS, n_epochs=MNIST_EPOCHS)

mnist_cnn_results = pd.concat([mnist_baseline_df, mnist_cnn_df, mnist_cnn_normal_df, mnist_cnn_uniform_df])

plot_summary(mnist_cnn_results, "MNIST CNN Test Accuracy")

In [None]:
mnist_cnn_normal_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_CNN_NORMAL"),
                                               model_name="Unscaled bias", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_cnn_down_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_CNN_NORMAL_DOWN_SCALED"),
                                             model_name="Downscaled bias", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
mnist_cnn_up_df = read_summary_files_to_df(summary_path=os.path.join("results", "MNIST_CNN_NORMAL_UP_SCALED"),
                                           model_name="Upscaled bias", n_runs=RUNS, n_epochs=MNIST_EPOCHS)

mnist_cnn_results = pd.concat([mnist_cnn_normal_df, mnist_cnn_down_df, mnist_cnn_up_df])

plot_summary(mnist_cnn_results, "MNIST CNN Test Accuracy")

# 2 CIFAR 10 dataset

In [None]:
R_SEED = 4240
CIFAR_EPOCHS = 10
CIFAR_BATCH_SIZE = 100

torch.manual_seed(R_SEED)
random.seed(R_SEED)
np.random.seed(R_SEED)

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="cifar10", train=False, flatten=True, batch_size=CIFAR_BATCH_SIZE)
train_loader, validation_loader = get_dataloader(dataset="cifar10", train=True, flatten=True,
                                                 batch_size=CIFAR_BATCH_SIZE, num_workers=1, worker_init_fn=seed_worker,
                                                 generator=generator)

### 2.1 Baseline Model

In [None]:
model = LogisticRegression(32 * 32 * 3, 10)

trainer = Trainer(model, model_name="CIFAR_LOGISTIC")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

### 2.2 FNN Model

#### 2.2.1 Models without zero initialization

In [None]:
model = CIFARFNNModel()
trainer = Trainer(model, model_name="CIFAR_FNN")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

#### 2.2.2 Experiments with zero initialization

In [None]:
model = CIFARFNNModel()
model.zero_initialization("zero")

trainer = Trainer(model, model_name="CIFAR10_FNN_ZERO")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

In [None]:
model = CIFARFNNModel()
model.zero_initialization("uniform")

trainer = Trainer(model, model_name="CIFAR10_FNN_UNIFORM")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

In [None]:
model = CIFARFNNModel()
model.zero_initialization("normal")

trainer = Trainer(model, model_name="CIFAR10_FNN_NORMAL")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)
trainer.test(test_loader)

#### 2.2.3 Final Results

In [None]:
torch.initial_seed()
random.seed()
np.random.seed()

CIFAR_EPOCHS = 15
CIFAR_BATCH_SIZE = 128
RUNS = 5

configs = [
    TrainerConfig(model_name="CIFAR10_FNN_BASELINE", epochs=CIFAR_EPOCHS, batch_size=CIFAR_BATCH_SIZE,
                  initialization_mode="default"),
    TrainerConfig(model_name="CIFAR10_FNN", epochs=CIFAR_EPOCHS, batch_size=CIFAR_BATCH_SIZE,
                  initialization_mode="default"),
    TrainerConfig(model_name="CIFAR10_FNN_NORMAL", epochs=CIFAR_EPOCHS, batch_size=CIFAR_BATCH_SIZE,
                  initialization_mode="normal"),
    TrainerConfig(model_name="CIFAR10_FNN_UNIFORM", epochs=CIFAR_EPOCHS, batch_size=CIFAR_BATCH_SIZE,
                  initialization_mode="uniform"),
]

for config in configs:
    if "BASELINE" in config.model_name:
        model = LogisticRegression(32 * 32 * 3, 10)
    else:
        model = CIFARFNNModel()

    run_experiment(model=model, dataset="cifar10", config=config, runs=RUNS, train_summary=False,
                   validation_summary=False, validate_after_epoch=False, test_after_epoch=True)
    del model

In [None]:
cifar_baseline_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_FNN_BASELINE"),
                                             model_name="Baseline model", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
cifar_fnn_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_FNN"),
                                        model_name="LeCun initialization", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
cifar_fnn_normal_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_FNN_NORMAL"),
                                               model_name="Normal initialization", n_runs=RUNS, n_epochs=MNIST_EPOCHS)
cifar_fnn_uniform_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_FNN_UNIFORM"),
                                                model_name="Uniform initialization", n_runs=RUNS, n_epochs=MNIST_EPOCHS)

cifar_fnn_results = pd.concat([cifar_baseline_df, cifar_fnn_df, cifar_fnn_normal_df, cifar_fnn_uniform_df])

plot_summary(cifar_fnn_results, "CIFAR-10 FNN Test Accuracy")

In [None]:
configs = [
    TrainerConfig(model_name="CIFAR10_FNN_GRAD", epochs=1, batch_size=128, initialization_mode="default"),
    TrainerConfig(model_name="CIFAR10_FNN_NORMAL_GRAD", epochs=1, batch_size=128, initialization_mode="normal"),
]

for config in configs:
    model = CIFARFNNModel(activation_fun=config.activation_fun)

    run_experiment(model=model, dataset="cifar10", config=config, runs=1, train_summary=True,
                   validation_summary=False, validate_after_epoch=False, test_after_epoch=True)
    del model

In [None]:
fnn_df = read_gradient_summary_to_df(os.path.join("results", "CIFAR10_FNN_GRAD"))
fnn_normal_df = read_gradient_summary_to_df(os.path.join("results", "CIFAR10_FNN_NORMAL_GRAD"))

plot_gradient_distribution_comparison(fnn_df, "CIFAR-10 FNN LeCun", fnn_normal_df, "CIFAR-10 FNN our approach")

### 2.3 CNN Model

In [None]:
R_SEED = 4240

torch.manual_seed(R_SEED)
random.seed(R_SEED)
np.random.seed(R_SEED)

#### 2.3.1 Models without zero initialization

In [None]:
generator = Generator()
generator.manual_seed(R_SEED)

test_loader = get_dataloader(dataset="cifar10", train=False, batch_size=CIFAR_BATCH_SIZE)
train_loader, validation_loader = get_dataloader(dataset="cifar10", train=True, batch_size=CIFAR_BATCH_SIZE,
                                                 num_workers=1,
                                                 worker_init_fn=seed_worker, generator=generator)

In [None]:
model = CIFARCNNModel()

trainer = Trainer(model, model_name="CIFAR10_CNN")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)

#### 2.3.2 Experiments with zero initialization

In [None]:
model = CIFARCNNModel()
model.zero_initialization("zero")

trainer = Trainer(model, model_name="CIFAR10_CNN_ZERO")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)

In [None]:
model = CIFARCNNModel()
model.zero_initialization("uniform")

trainer = Trainer(model, model_name="CIFAR10_CNN_UNIFORM")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)

In [None]:
model = CIFARCNNModel()
model.zero_initialization("normal")

trainer = Trainer(model, model_name="CIFAR10_CNN_NORMAL")
trainer.train(train_loader, validation_loader, CIFAR_EPOCHS)

#### 2.3.3 Final Results

In [None]:
torch.initial_seed()
random.seed()
np.random.seed()

CIFAR_EPOCHS = 20
CIFAR_BATCH_SIZE = 128
RUNS = 5

configs = [
    TrainerConfig(model_name="CIFAR10_CNN", epochs=CIFAR_EPOCHS, batch_size=CIFAR_BATCH_SIZE,
                  initialization_mode="default"),
    TrainerConfig(model_name="CIFAR10_CNN_NORMAL", epochs=CIFAR_EPOCHS, batch_size=CIFAR_BATCH_SIZE,
                  initialization_mode="normal"),
    TrainerConfig(model_name="CIFAR10_CNN_UNIFORM", epochs=CIFAR_EPOCHS, batch_size=CIFAR_BATCH_SIZE,
                  initialization_mode="uniform"),
]

for config in configs:
    model = CIFARCNNModel()

run_experiment(model=model, dataset="cifar10", config=config, runs=5, train_summary=False, validation_summary=False,
               validate_after_epoch=False, test_after_epoch=True, test_final_model=False)
del model

In [None]:
cifar_baseline_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_FNN_BASELINE"),
                                             model_name="Baseline model", n_runs=RUNS, n_epochs=15)
cifar_cnn_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_CNN"),
                                        model_name="LeCun initialization",
                                        n_runs=RUNS, n_epochs=CIFAR_EPOCHS)
cifar_cnn_normal_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_CNN_NORMAL"),
                                               model_name="Normal initialization", n_runs=RUNS, n_epochs=CIFAR_EPOCHS)
cifar_cnn_uniform_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_CNN_UNIFORM"),
                                                model_name="Uniform initialization", n_runs=RUNS, n_epochs=CIFAR_EPOCHS)

cifar_results = pd.concat([cifar_baseline_df, cifar_cnn_df, cifar_cnn_normal_df, cifar_cnn_uniform_df])

plot_summary(cifar_results, "CIFAR-10 CNN Test Accuracy")

# 2.4 ResNet-50 model

To confirm our conclusions on a more complex task we run the CIFAR-10 experiments for a ResNet-50 model.

In [None]:
R_SEED = 4240
RESNET_EPOCHS = 20

torch.manual_seed(R_SEED)
random.seed(R_SEED)
np.random.seed(R_SEED)

In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

generator = Generator().manual_seed(R_SEED)

test_loader = get_dataloader(dataset="cifar10", train=False, batch_size=100, transform=transform_test)
train_loader, validation_loader = get_dataloader(dataset="cifar10", train=True, batch_size=128,
                                                 transform=transform_train, num_workers=1, worker_init_fn=seed_worker,
                                                 generator=generator)

In [None]:
model = ResNet50()

trainer = Trainer(model, model_name="CIFAR10_RESNET")
trainer.train(train_loader, validation_loader, num_epochs=RESNET_EPOCHS, train_summary=False, validate_after_epoch=True)

In [None]:
model = ResNet50()
model.zero_initialization("uniform")

trainer = Trainer(model, model_name="CIFAR10_RESNET_UNIFORM")
trainer.train(train_loader, validation_loader, num_epochs=RESNET_EPOCHS, train_summary=False, validate_after_epoch=True)

In [None]:
model = ResNet50()
model.zero_initialization("normal")

trainer = Trainer(model, model_name="CIFAR10_RESNET_NORMAL")
trainer.train(train_loader, validation_loader, num_epochs=RESNET_EPOCHS, train_summary=False, validate_after_epoch=True)

#### 2.4.1 Final Results

In [None]:
torch.initial_seed()
random.seed()
np.random.seed()

RESNET_EPOCHS = 30
RESNET_BATCH_SIZE = 128
RUNS = 5

transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

configs = [
    TrainerConfig(model_name="CIFAR10_RESNET", epochs=RESNET_EPOCHS, batch_size=RESNET_BATCH_SIZE,
                  initialization_mode="default", transform_test=transform_test, transform_train=transform_train),
    TrainerConfig(model_name="CIFAR10_RESNET_NORMAL", epochs=RESNET_EPOCHS, batch_size=RESNET_BATCH_SIZE,
                  initialization_mode="normal", transform_test=transform_test, transform_train=transform_train),
    TrainerConfig(model_name="CIFAR10_RESNET_UNIFORM", epochs=RESNET_EPOCHS, batch_size=RESNET_BATCH_SIZE,
                  initialization_mode="uniform", transform_test=transform_test, transform_train=transform_train),
]

for config in configs:
    model = ResNet50()
    run_experiment(model=model, dataset="cifar10", config=config, runs=RUNS, train_summary=False,
                   validation_summary=False,
                   validate_after_epoch=False, test_after_epoch=True)
    del model

In [None]:
resnet_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_RESNET"), model_name="ResNet",
                                     n_runs=RUNS, n_epochs=RESNET_EPOCHS)
resnet_normal_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_RESNET_NORMAL"),
                                            model_name="ResNet normal", n_runs=RUNS, n_epochs=RESNET_EPOCHS)
resnet_uniform_df = read_summary_files_to_df(summary_path=os.path.join("results", "CIFAR10_RESNET_UNIFORM"),
                                             model_name="ResNet uniform", n_runs=RUNS, n_epochs=RESNET_EPOCHS)

resnet_results = pd.concat([resnet_df, resnet_normal_df, resnet_uniform_df])

plot_summary(resnet_results, "ResNet-50 Test Accuracy")