Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- make automated settings' last linear layers non-differentiable to save time - update new test suite, as it assumes all model parameters are differentiable - update `ggn_vector_product` to work with models that contain non-differentiable parameters and fully-document `ggnvp.py` --- * [DOC] Fully-document GGNVP and ignore non-differentiable parameters * [TEST] Make automated settings' last linear layers non-differentiable The final layers can have large that make the computation of second-order quantities expensive. Disabling their `requires_grad` speeds up the tests. * [TEST] Check non-differentiable parameters while collecting results * [DOC] Fully document automated test setting helpers
- Loading branch information
Showing
7 changed files
with
213 additions
and
167 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,126 +1,143 @@ | ||
"""Contains helpers to create CNN test cases.""" | ||
from test.core.derivatives.utils import classification_targets | ||
from typing import Any, Tuple, Type | ||
|
||
import torch | ||
from torch import Tensor, rand | ||
from torch.nn import Conv2d, CrossEntropyLoss, Flatten, Linear, Module, ReLU, Sequential | ||
|
||
### | ||
# Helpers | ||
### | ||
|
||
def set_requires_grad(model: Module, new_requires_grad: bool) -> None: | ||
"""Set the ``requires_grad`` attribute of the model parameters. | ||
def make_simple_act_setting(act_cls, bias): | ||
Args: | ||
model: Network or layer. | ||
new_requires_grad: New value for ``requires_grad``. | ||
""" | ||
input: Activation function & Bias setting | ||
return: simple CNN Network | ||
for p in model.parameters(): | ||
p.requires_grad = new_requires_grad | ||
|
||
This function is used to automatically create a | ||
simple CNN Network consisting of CNN & Linear layer | ||
for different activation functions. | ||
It is used to test `test.extensions`. | ||
|
||
def make_simple_act_setting(act_cls: Type[Module], bias: bool) -> dict: | ||
"""Create a simple CNN with activation as test case dictionary. | ||
Make parameters of final linear layer non-differentiable to save run time. | ||
Args: | ||
act_cls: Class of the activation function. | ||
bias: Use bias in the convolution. | ||
Returns: | ||
Dictionary representation of the simple CNN test case. | ||
""" | ||
|
||
def make_simple_cnn(act_cls, bias): | ||
return torch.nn.Sequential( | ||
torch.nn.Conv2d(3, 2, 2, bias=bias), | ||
act_cls(), | ||
torch.nn.Flatten(), | ||
torch.nn.Linear(72, 5), | ||
) | ||
def _make_simple_cnn(act_cls: Type[Module], bias: bool) -> Sequential: | ||
linear = Linear(72, 5) | ||
set_requires_grad(linear, False) | ||
|
||
return Sequential(Conv2d(3, 2, 2, bias=bias), act_cls(), Flatten(), linear) | ||
|
||
dict_setting = { | ||
"input_fn": lambda: torch.rand(3, 3, 7, 7), | ||
"module_fn": lambda: make_simple_cnn(act_cls, bias), | ||
"loss_function_fn": lambda: torch.nn.CrossEntropyLoss(), | ||
"input_fn": lambda: rand(3, 3, 7, 7), | ||
"module_fn": lambda: _make_simple_cnn(act_cls, bias), | ||
"loss_function_fn": lambda: CrossEntropyLoss(), | ||
"target_fn": lambda: classification_targets((3,), 5), | ||
"id_prefix": "automated-simple-cnn-act", | ||
} | ||
|
||
return dict_setting | ||
|
||
|
||
def make_simple_cnn_setting(input_size, conv_class, conv_params): | ||
""" | ||
input_size: tuple of input size of (N*C*Image Size) | ||
conv_class: convolutional class | ||
conv_params: configurations for convolutional class | ||
return: simple CNN Network | ||
This function is used to automatically create a | ||
simple CNN Network consisting of CNN & Linear layer | ||
for different convolutional layers. | ||
It is used to test `test.extensions`. | ||
def make_simple_cnn_setting( | ||
input_size: Tuple[int], conv_cls: Type[Module], conv_params: Tuple[Any] | ||
) -> dict: | ||
"""Create ReLU CNN with convolution hyperparameters as test case dictionary. | ||
Make parameters of final linear layer non-differentiable to save run time. | ||
Args: | ||
input_size: Input shape ``[N, C_in, ...]``. | ||
conv_cls: Class of convolution layer. | ||
conv_params: Convolution hyperparameters. | ||
Returns: | ||
Dictionary representation of the test case. | ||
""" | ||
|
||
def make_cnn(conv_class, output_size, conv_params): | ||
"""Note: output class size is assumed to be 5""" | ||
return torch.nn.Sequential( | ||
conv_class(*conv_params), | ||
torch.nn.ReLU(), | ||
torch.nn.Flatten(), | ||
torch.nn.Linear(output_size, 5), | ||
) | ||
def _make_cnn( | ||
conv_cls: Type[Module], output_dim: int, conv_params: Tuple | ||
) -> Sequential: | ||
linear = Linear(output_dim, 5) | ||
set_requires_grad(linear, False) | ||
|
||
def get_output_shape(module, module_params, input): | ||
"""Returns the output shape for a given layer.""" | ||
output = module(*module_params)(input) | ||
return output.numel() // output.shape[0] | ||
return Sequential(conv_cls(*conv_params), ReLU(), Flatten(), linear) | ||
|
||
input = torch.rand(input_size) | ||
output_size = get_output_shape(conv_class, conv_params, input) | ||
input = rand(input_size) | ||
output_dim = _get_output_dim(conv_cls(*conv_params), input) | ||
|
||
dict_setting = { | ||
"input_fn": lambda: torch.rand(input_size), | ||
"module_fn": lambda: make_cnn(conv_class, output_size, conv_params), | ||
"loss_function_fn": lambda: torch.nn.CrossEntropyLoss(reduction="sum"), | ||
"input_fn": lambda: rand(input_size), | ||
"module_fn": lambda: _make_cnn(conv_cls, output_dim, conv_params), | ||
"loss_function_fn": lambda: CrossEntropyLoss(reduction="sum"), | ||
"target_fn": lambda: classification_targets((3,), 5), | ||
"id_prefix": "automated-simple-cnn", | ||
} | ||
|
||
return dict_setting | ||
|
||
|
||
def make_simple_pooling_setting(input_size, conv_class, pool_cls, pool_params): | ||
""" | ||
input_size: tuple of input size of (N*C*Image Size) | ||
conv_class: convolutional class | ||
conv_params: configurations for convolutional class | ||
return: simple CNN Network | ||
This function is used to automatically create a | ||
simple CNN Network consisting of CNN & Linear layer | ||
for different convolutional layers. | ||
It is used to test `test.extensions`. | ||
def make_simple_pooling_setting( | ||
input_size: Tuple[int], | ||
conv_cls: Type[Module], | ||
pool_cls: Type[Module], | ||
pool_params: Tuple[Any], | ||
) -> dict: | ||
"""Create CNN with convolution and pooling layer as test case dictionary. | ||
Make parameters of final linear layer non-differentiable to save run time. | ||
Args: | ||
input_size: Input shape ``[N, C_in, ...]``. | ||
conv_cls: Class of convolution layer. | ||
pool_cls: Class of pooling layer. | ||
pool_params: Pooling hyperparameters. | ||
Returns: | ||
Dictionary representation of the test case. | ||
""" | ||
|
||
def make_cnn(conv_class, output_size, conv_params, pool_cls, pool_params): | ||
"""Note: output class size is assumed to be 5""" | ||
return torch.nn.Sequential( | ||
conv_class(*conv_params), | ||
torch.nn.ReLU(), | ||
pool_cls(*pool_params), | ||
torch.nn.Flatten(), | ||
torch.nn.Linear(output_size, 5), | ||
def _make_cnn( | ||
conv_cls: Type[Module], | ||
output_size: int, | ||
conv_params: Tuple[Any], | ||
pool_cls: Type[Module], | ||
pool_params: Tuple[Any], | ||
) -> Sequential: | ||
linear = Linear(output_size, 5) | ||
set_requires_grad(linear, False) | ||
|
||
return Sequential( | ||
conv_cls(*conv_params), ReLU(), pool_cls(*pool_params), Flatten(), linear | ||
) | ||
|
||
def get_output_shape(module, module_params, input, pool, pool_params): | ||
"""Returns the output shape for a given layer.""" | ||
output_1 = module(*module_params)(input) | ||
output = pool_cls(*pool_params)(output_1) | ||
return output.numel() // output.shape[0] | ||
|
||
conv_params = (3, 2, 2) | ||
input = torch.rand(input_size) | ||
output_size = get_output_shape( | ||
conv_class, conv_params, input, pool_cls, pool_params | ||
input = rand(input_size) | ||
output_dim = _get_output_dim( | ||
Sequential(conv_cls(*conv_params), pool_cls(*pool_params)), input | ||
) | ||
|
||
dict_setting = { | ||
"input_fn": lambda: torch.rand(input_size), | ||
"module_fn": lambda: make_cnn( | ||
conv_class, output_size, conv_params, pool_cls, pool_params | ||
"input_fn": lambda: rand(input_size), | ||
"module_fn": lambda: _make_cnn( | ||
conv_cls, output_dim, conv_params, pool_cls, pool_params | ||
), | ||
"loss_function_fn": lambda: torch.nn.CrossEntropyLoss(reduction="sum"), | ||
"loss_function_fn": lambda: CrossEntropyLoss(reduction="sum"), | ||
"target_fn": lambda: classification_targets((3,), 5), | ||
"id_prefix": "automated-simple-cnn", | ||
} | ||
|
||
return dict_setting | ||
|
||
|
||
def _get_output_dim(module: Module, input: Tensor) -> int: | ||
output = module(input) | ||
return output.numel() // output.shape[0] |
Oops, something went wrong.