Skip to content
Permalink
 
 
Cannot retrieve contributors at this time
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import scipy.linalg
from . import thops
class _ActNorm(nn.Module):
"""
Activation Normalization
Initialize the bias and scale with a given minibatch,
so that the output per-channel have zero mean and unit variance for that.
After initialization, `bias` and `logs` will be trained as parameters.
"""
def __init__(self, num_features, scale=1.):
super().__init__()
# register mean and scale
size = [1, num_features, 1, 1]
self.register_parameter("bias", nn.Parameter(torch.zeros(*size)))
self.register_parameter("logs", nn.Parameter(torch.zeros(*size)))
self.num_features = num_features
self.scale = float(scale)
self.inited = False
def _check_input_dim(self, input):
return NotImplemented
def initialize_parameters(self, input):
self._check_input_dim(input)
if not self.training:
return
assert input.device == self.bias.device
with torch.no_grad():
bias = thops.mean(input.clone(), dim=[0, 2, 3], keepdim=True) * -1.0
vars = thops.mean((input.clone() + bias) ** 2, dim=[0, 2, 3], keepdim=True)
logs = torch.log(self.scale/(torch.sqrt(vars)+1e-6))
self.bias.data.copy_(bias.data)
self.logs.data.copy_(logs.data)
self.inited = True
def _center(self, input, reverse=False):
if not reverse:
return input + self.bias
else:
return input - self.bias
def _scale(self, input, logdet=None, reverse=False):
logs = self.logs
if not reverse:
input = input * torch.exp(logs)
else:
input = input * torch.exp(-logs)
if logdet is not None:
"""
logs is log_std of `mean of channels`
so we need to multiply pixels
"""
dlogdet = thops.sum(logs) * thops.pixels(input)
if reverse:
dlogdet *= -1
logdet = logdet + dlogdet
return input, logdet
def forward(self, input, logdet=None, reverse=False):
if not self.inited:
self.initialize_parameters(input)
self._check_input_dim(input)
# no need to permute dims as old version
if not reverse:
# center and scale
input = self._center(input, reverse)
input, logdet = self._scale(input, logdet, reverse)
else:
# scale and center
input, logdet = self._scale(input, logdet, reverse)
input = self._center(input, reverse)
return input, logdet
class ActNorm2d(_ActNorm):
def __init__(self, num_features, scale=1.):
super().__init__(num_features, scale)
def _check_input_dim(self, input):
assert len(input.size()) == 4
assert input.size(1) == self.num_features, (
"[ActNorm]: input should be in shape as `BCHW`,"
" channels should be {} rather than {}".format(
self.num_features, input.size()))
class LinearZeros(nn.Linear):
def __init__(self, in_channels, out_channels, logscale_factor=3):
super().__init__(in_channels, out_channels)
self.logscale_factor = logscale_factor
# set logs parameter
self.register_parameter("logs", nn.Parameter(torch.zeros(out_channels)))
# init
self.weight.data.zero_()
self.bias.data.zero_()
def forward(self, input):
output = super().forward(input)
return output * torch.exp(self.logs * self.logscale_factor)
class Conv2d(nn.Conv2d):
pad_dict = {
"same": lambda kernel, stride: [((k - 1) * s + 1) // 2 for k, s in zip(kernel, stride)],
"valid": lambda kernel, stride: [0 for _ in kernel]
}
@staticmethod
def get_padding(padding, kernel_size, stride):
# make paddding
if isinstance(padding, str):
if isinstance(kernel_size, int):
kernel_size = [kernel_size, kernel_size]
if isinstance(stride, int):
stride = [stride, stride]
padding = padding.lower()
try:
padding = Conv2d.pad_dict[padding](kernel_size, stride)
except KeyError:
raise ValueError("{} is not supported".format(padding))
return padding
def __init__(self, in_channels, out_channels,
kernel_size=[3, 3], stride=[1, 1],
padding="same", do_actnorm=True, weight_std=0.05):
padding = Conv2d.get_padding(padding, kernel_size, stride)
super().__init__(in_channels, out_channels, kernel_size, stride,
padding, bias=(not do_actnorm))
# init weight with std
self.weight.data.normal_(mean=0.0, std=weight_std)
if not do_actnorm:
self.bias.data.zero_()
else:
self.actnorm = ActNorm2d(out_channels)
self.do_actnorm = do_actnorm
def forward(self, input):
x = super().forward(input)
if self.do_actnorm:
x, _ = self.actnorm(x)
return x
class Conv2dZeros(nn.Conv2d):
def __init__(self, in_channels, out_channels,
kernel_size=[3, 3], stride=[1, 1],
padding="same", logscale_factor=3):
padding = Conv2d.get_padding(padding, kernel_size, stride)
super().__init__(in_channels, out_channels, kernel_size, stride, padding)
# logscale_factor
self.logscale_factor = logscale_factor
self.register_parameter("logs", nn.Parameter(torch.zeros(out_channels, 1, 1)))
# init
self.weight.data.zero_()
self.bias.data.zero_()
def forward(self, input):
output = super().forward(input)
return output * torch.exp(self.logs * self.logscale_factor)
class Permute2d(nn.Module):
def __init__(self, num_channels, shuffle):
super().__init__()
self.num_channels = num_channels
self.indices = np.arange(self.num_channels - 1, -1, -1).astype(np.long)
self.indices_inverse = np.zeros((self.num_channels), dtype=np.long)
for i in range(self.num_channels):
self.indices_inverse[self.indices[i]] = i
if shuffle:
self.reset_indices()
def reset_indices(self):
np.random.shuffle(self.indices)
for i in range(self.num_channels):
self.indices_inverse[self.indices[i]] = i
def forward(self, input, reverse=False):
assert len(input.size()) == 4
if not reverse:
return input[:, self.indices, :, :]
else:
return input[:, self.indices_inverse, :, :]
class InvertibleConv1x1(nn.Module):
def __init__(self, num_channels, LU_decomposed=False):
super().__init__()
w_shape = [num_channels, num_channels]
w_init = np.linalg.qr(np.random.randn(*w_shape))[0].astype(np.float32)
if not LU_decomposed:
# Sample a random orthogonal matrix:
self.register_parameter("weight", nn.Parameter(torch.Tensor(w_init)))
else:
np_p, np_l, np_u = scipy.linalg.lu(w_init)
np_s = np.diag(np_u)
np_sign_s = np.sign(np_s)
np_log_s = np.log(np.abs(np_s))
np_u = np.triu(np_u, k=1)
l_mask = np.tril(np.ones(w_shape, dtype=np.float32), -1)
eye = np.eye(*w_shape, dtype=np.float32)
self.register_buffer('p', torch.Tensor(np_p.astype(np.float32)))
self.register_buffer('sign_s', torch.Tensor(np_sign_s.astype(np.float32)))
self.l = nn.Parameter(torch.Tensor(np_l.astype(np.float32)))
self.log_s = nn.Parameter(torch.Tensor(np_log_s.astype(np.float32)))
self.u = nn.Parameter(torch.Tensor(np_u.astype(np.float32)))
self.l_mask = torch.Tensor(l_mask)
self.eye = torch.Tensor(eye)
self.w_shape = w_shape
self.LU = LU_decomposed
def get_weight(self, input, reverse):
w_shape = self.w_shape
if not self.LU:
pixels = thops.pixels(input)
dlogdet = torch.slogdet(self.weight)[1] * pixels
if not reverse:
weight = self.weight.view(w_shape[0], w_shape[1], 1, 1)
else:
weight = torch.inverse(self.weight.double()).float()\
.view(w_shape[0], w_shape[1], 1, 1)
return weight, dlogdet
else:
self.p = self.p.to(input.device)
self.sign_s = self.sign_s.to(input.device)
self.l_mask = self.l_mask.to(input.device)
self.eye = self.eye.to(input.device)
l = self.l * self.l_mask + self.eye
u = self.u * self.l_mask.transpose(0, 1).contiguous() + torch.diag(self.sign_s * torch.exp(self.log_s))
dlogdet = thops.sum(self.log_s) * thops.pixels(input)
if not reverse:
w = torch.matmul(self.p, torch.matmul(l, u))
else:
l = torch.inverse(l.double()).float()
u = torch.inverse(u.double()).float()
w = torch.matmul(u, torch.matmul(l, self.p.inverse()))
return w.view(w_shape[0], w_shape[1], 1, 1), dlogdet
def forward(self, input, logdet=None, reverse=False):
"""
log-det = log|abs(|W|)| * pixels
"""
weight, dlogdet = self.get_weight(input, reverse)
if not reverse:
z = F.conv2d(input, weight)
if logdet is not None:
logdet = logdet + dlogdet
return z, logdet
else:
z = F.conv2d(input, weight)
if logdet is not None:
logdet = logdet - dlogdet
return z, logdet
class GaussianDiag:
Log2PI = float(np.log(2 * np.pi))
@staticmethod
def likelihood(mean, logs, x):
"""
lnL = -1/2 * { ln|Var| + ((X - Mu)^T)(Var^-1)(X - Mu) + kln(2*PI) }
k = 1 (Independent)
Var = logs ** 2
"""
return -0.5 * (logs * 2. + ((x - mean) ** 2) / torch.exp(logs * 2.) + GaussianDiag.Log2PI)
@staticmethod
def logp(mean, logs, x):
likelihood = GaussianDiag.likelihood(mean, logs, x)
return thops.sum(likelihood, dim=[1, 2, 3])
@staticmethod
def sample(mean, logs, eps_std=None):
eps_std = eps_std or 1
eps = torch.normal(mean=torch.zeros_like(mean),
std=torch.ones_like(logs) * eps_std)
return mean + torch.exp(logs) * eps
class Split2d(nn.Module):
def __init__(self, num_channels):
super().__init__()
self.conv = Conv2dZeros(num_channels // 2, num_channels)
def split2d_prior(self, z):
h = self.conv(z)
return thops.split_feature(h, "cross")
def forward(self, input, logdet=0., reverse=False, eps_std=None):
if not reverse:
z1, z2 = thops.split_feature(input, "split")
mean, logs = self.split2d_prior(z1)
logdet = GaussianDiag.logp(mean, logs, z2) + logdet
return z1, logdet
else:
z1 = input
mean, logs = self.split2d_prior(z1)
z2 = GaussianDiag.sample(mean, logs, eps_std)
z = thops.cat_feature(z1, z2)
return z, logdet
def squeeze2d(input, factor=2):
assert factor >= 1 and isinstance(factor, int)
if factor == 1:
return input
size = input.size()
B = size[0]
C = size[1]
H = size[2]
W = size[3]
assert H % factor == 0 and W % factor == 0, "{}".format((H, W))
x = input.view(B, C, H // factor, factor, W // factor, factor)
x = x.permute(0, 1, 3, 5, 2, 4).contiguous()
x = x.view(B, C * factor * factor, H // factor, W // factor)
return x
def unsqueeze2d(input, factor=2):
assert factor >= 1 and isinstance(factor, int)
factor2 = factor ** 2
if factor == 1:
return input
size = input.size()
B = size[0]
C = size[1]
H = size[2]
W = size[3]
assert C % (factor2) == 0, "{}".format(C)
x = input.view(B, C // factor2, factor, factor, H, W)
x = x.permute(0, 1, 4, 2, 5, 3).contiguous()
x = x.view(B, C // (factor2), H * factor, W * factor)
return x
class SqueezeLayer(nn.Module):
def __init__(self, factor):
super().__init__()
self.factor = factor
def forward(self, input, logdet=None, reverse=False):
if not reverse:
output = squeeze2d(input, self.factor)
return output, logdet
else:
output = unsqueeze2d(input, self.factor)
return output, logdet