In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader

print('PyTorch version:', torch.__version__)
use_gpu = torch.cuda.is_available()
print('Is GPU available:', use_gpu)

PyTorch version: 1.0.0
Is GPU available: True


In [2]:
batchsize = 32
device = torch.device('cuda' if use_gpu else 'cpu')

seed = 1
torch.manual_seed(seed)
if use_gpu:
    torch.cuda.manual_seed(seed)

In [13]:
# データセットの作成（シンプルな正弦波＋ノイズ）
ϵ = 0.1
datalength = 100
t = torch.linspace(0, 1, datalength)
y = torch.sin(2 * np.pi * t)

datasize = 500
data_tensor = torch.zeros(datasize, datalength)
for i in range(datasize):
    data_tensor[i] = y.clone() + ϵ*torch.randn(t.size())
    
train_loader = DataLoader(data_tensor, batch_size=batchsize, shuffle=True, num_workers=4)

In [14]:
# Invertibity check -> ok
class Invertible1x1Conv(nn.Module):
    def __init__(self, num_features):
        super(Invertible1x1Conv, self).__init__()
        self.conv = nn.Conv1d(num_features, num_features, kernel_size=1, stride=1, padding=0, bias=False)
        
        W = torch.qr(torch.FloatTensor(num_features, num_features).normal_())[0]
        
        if torch.det(W) < 0:
            W[:,0] = -1*W[:,0]
        
        self.conv.weight.data = W.view(num_features, num_features, 1)
        
    def forward(self, x):
        z = self.conv(x)
        log_det_jacobian = self.calculate_log_det_jacobian(x)
        return z, log_det_jacobian
        
    def inverse(self, z, train_finished=False):
        W = self.conv.weight.squeeze()
        if train_finished:
            if not hasattr(self, 'W_inverse'):
                W_inverse = W.inverse()
                self.W_inverse = W_inverse.view(*W_inverse.size(), 1)
            x = F.conv1d(z, self.W_inverse, bias=None, stride=1, padding=0)
        else:
            W_inverse = W.inverse()
            W_inverse = W_inverse.view(*W_inverse.size(), 1)
            x = F.conv1d(z, W_inverse, bias=None, stride=1, padding=0)
        return x
        
    def calculate_log_det_jacobian(self, x):
        batch_size, group_size, n_of_groups = x.size()
        W = self.conv.weight.squeeze()
        log_det_jacobian = batch_size * n_of_groups * torch.logdet(W)
        return log_det_jacobian

In [15]:
class NN(nn.Module):
    def __init__(self, n_features):
        super(NN, self).__init__()
        n_hidden = 64
        self.cv1 = nn.Conv1d(n_features, n_hidden, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm1d(n_hidden)
        self.cv2 = nn.Conv1d(n_hidden, n_hidden, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(n_hidden)
        self.cv3 = nn.Conv1d(n_hidden, 2*n_features, kernel_size=3, stride=1, padding=1)
        
        self.cv3.weight.data.zero_()
        self.cv3.bias.data.zero_()
        
    def forward(self, x):
        out = F.relu(self.bn1(self.cv1(x)))
        out = F.relu(self.bn2(self.cv2(out)))
        out = self.cv3(out)
        return out

In [16]:
class WaveGlow(nn.Module):
    def __init__(self, n_flows, n_group, n_early_every, n_early_size):
        super(WaveGlow, self).__init__()
        
        assert(n_groups % 2 == 0)
        self.n_flows = n_flows
        self.n_group = n_group
        self.n_early_every = n_early_every
        self.n_early_size = n_early_size
        
        self.NN = nn.ModuleList()
        self.convinv = nn.ModuleList()
        
        n_half = int(n_groups/2)
        
        n_remaining_channels = n_group
        for k in range(n_flows):
            if k % self.n_early_every == 0 and k > 0:
                n_half = n_half - int(self.n_early_size/2)
                n_remaining_channels = n_remaining_channels - self.n_early_size
            self.convinv.append(Invertible1x1Conv(n_remaining_channels))
            self.NN.append(NN(n_half))
        self.n_remaining_channels = n_remaining_channels
        
    def forward(self, x):
        x = x.unfold(1, self.n_group, self.n_group).permute(0, 2, 1)
        z = []
        log_det_jacobian = 0
        
        for k in range(self.n_flows):
            if k % self.n_early_every == 0 and k > 0:
                z.append(x[:,:self.n_early_size,:])
                x = x[:,self.n_early_size:,:]
            x, log_det_temp = self.convinv[k](x)
            log_det_jacobian += log_det_temp
            
            n_half = int(x.size(1)/2)
            x_0 = x[:,:n_half,:]
            x_1 = x[:,n_half:,:]
            
            out = self.NN[k](x_0)
            log_s = out[:,n_half:,:]
            b = out[:,:n_half,:]
            x_1 = torch.exp(log_s)*x_1 + b
            
            log_det_jacobian += torch.sum(log_s)
        
        z.append(x)
        z = torch.cat(z, dim=1)
        self.z_size = z.size()
        return z, log_det_jacobian
    
    def infer(self, simga=1.0):
        z = sigma * torch.cuda.FloatTensor(self.z_size[0], 
                                           self.n_remaining_channels, 
                                           self.z_size[2]).normal()
        for k in reversed(range(self.n_flows)):
            n_half = int(z.size(1)/2)
            z_0 = z[:,:n_half,:]
            z_1 = z[:,n_half:,:]
            
            out = self.NN[k](z_0)
            log_s = out[:,n_half:,:]
            b = out[:,:n_half,:]
            z_1 = (z_1 - b) / torch.exp(log_s)
            z = torch.cat([z_0, z_1], dim=1)
            
            z = self.convinv[k].inverse(z)
            
            if k % self.n_early_every == 0 and k > 0:
                add_z = sigma * torch.cuda.FloatTensor(self.z_size[0], 
                                                       self.n_early_size, 
                                                       self.z_size[2]).normal_()
                z = torch.cat([add_z, z], dim=1)
        z = z.permute(0,2,1).contiguous().view(z.size(0), -1).data
        return z

In [None]:
class WaveGlowLoss(nn.Module):
    def __init__(self, sigma=1.0):
        super(WaveGlowLoss, self).__init__()
        self.sigma = sigma
        
    def forward(self, model_output):
        z, log_det_jacobian = model_output
        batch_size, group_size, n_of_groups = z.size()
        loss = torch.sum(z*z)/(2*self.sigma*self.sigma) - log_det_jacobian
        
        return loss / (batch_size*group_size*n_of_groups)

In [10]:
x = torch.arange(64).view(1,-1)
y = x.unfold(1,8,8).permute(0,2,1)
print(x)
print(y)

tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
         54, 55, 56, 57, 58, 59, 60, 61, 62, 63]])
tensor([[[ 0,  8, 16, 24, 32, 40, 48, 56],
         [ 1,  9, 17, 25, 33, 41, 49, 57],
         [ 2, 10, 18, 26, 34, 42, 50, 58],
         [ 3, 11, 19, 27, 35, 43, 51, 59],
         [ 4, 12, 20, 28, 36, 44, 52, 60],
         [ 5, 13, 21, 29, 37, 45, 53, 61],
         [ 6, 14, 22, 30, 38, 46, 54, 62],
         [ 7, 15, 23, 31, 39, 47, 55, 63]]])


In [9]:
a.permute(0,2,1).view(1,-1)

tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
         36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
         54, 55, 56, 57, 58, 59, 60, 61, 62, 63]])

In [264]:
def check_flow_invertibity(net, inputs, epsilon=1e-5):
    with torch.no_grad():    
        recons1 = net.inverse(net.forward(inputs)[0])
        recons2 = net.forward(net.inverse(inputs))[0]
    
        diff1 = torch.abs(inputs - recons1)
        diff2 = torch.abs(inputs - recons2)
    
        print('forward -> inverse:', not (diff1 > epsilon).any().item())
        print('inverse -> forward:', not (diff2 > epsilon).any().item())
        
    for i in range(50):
        optimizer = optim.Adam(net.parameters(), lr=0.1)
        criterion = nn.MSELoss()
        outputs = net(inputs)[0]
    
        optimizer.zero_grad()
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
    
    with torch.no_grad():
        recons1 = net.inverse(net.forward(inputs)[0])
        recons2 = net.forward(net.inverse(inputs))[0]
    
        diff1 = torch.abs(inputs - recons1)
        diff2 = torch.abs(inputs - recons2)
    
        print('forward -> inverse(trained):', not (diff1 > epsilon).any().item())
        print('inverse -> forward(trained):', not (diff2 > epsilon).any().item())  

AttributeError: 'Invertible1x1Conv' object has no attribute 'param_group'

In [103]:
torch.zeros(5,5).byte().any()

tensor(0, dtype=torch.uint8)