This repository has been archived by the owner on Dec 16, 2022. It is now read-only.
/
residual_with_layer_dropout.py
64 lines (54 loc) · 2.45 KB
/
residual_with_layer_dropout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import torch
class ResidualWithLayerDropout(torch.nn.Module):
"""
A residual connection with the layer dropout technique [Deep Networks with Stochastic
Depth](https://arxiv.org/pdf/1603.09382.pdf).
This module accepts the input and output of a layer, decides whether this layer should
be stochastically dropped, returns either the input or output + input. During testing,
it will re-calibrate the outputs of this layer by the expected number of times it
participates in training.
"""
def __init__(self, undecayed_dropout_prob: float = 0.5) -> None:
super().__init__()
if undecayed_dropout_prob < 0 or undecayed_dropout_prob > 1:
raise ValueError(
f"undecayed dropout probability has to be between 0 and 1, "
f"but got {undecayed_dropout_prob}"
)
self.undecayed_dropout_prob = undecayed_dropout_prob
def forward(
self, # type: ignore
layer_input: torch.Tensor,
layer_output: torch.Tensor,
layer_index: int = None,
total_layers: int = None,
) -> torch.Tensor:
"""
Apply dropout to this layer, for this whole mini-batch.
dropout_prob = layer_index / total_layers * undecayed_dropout_prob if layer_idx and
total_layers is specified, else it will use the undecayed_dropout_prob directly.
# Parameters
layer_input `torch.FloatTensor` required
The input tensor of this layer.
layer_output `torch.FloatTensor` required
The output tensor of this layer, with the same shape as the layer_input.
layer_index `int`
The layer index, starting from 1. This is used to calcuate the dropout prob
together with the `total_layers` parameter.
total_layers `int`
The total number of layers.
# Returns
output : `torch.FloatTensor`
A tensor with the same shape as `layer_input` and `layer_output`.
"""
if layer_index is not None and total_layers is not None:
dropout_prob = 1.0 * self.undecayed_dropout_prob * layer_index / total_layers
else:
dropout_prob = 1.0 * self.undecayed_dropout_prob
if self.training:
if torch.rand(1) < dropout_prob:
return layer_input
else:
return layer_output + layer_input
else:
return (1 - dropout_prob) * layer_output + layer_input