In [231]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
from sklearn.model_selection import train_test_split

import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader

import torchvision
from torchvision import datasets, transforms
from torchvision.utils import make_grid

print('PyTorch version:', torch.__version__)
print('torchvision version:', torchvision.__version__)
use_gpu = torch.cuda.is_available()
print('Is GPU available:', use_gpu)

PyTorch version: 0.4.1
torchvision version: 0.2.1
Is GPU available: True


In [232]:
# general settings
device = torch.device('cuda' if use_gpu else 'cpu')
print('device:', device)

# batchsize
batchsize = 100

# random seed
seed = 1
torch.manual_seed(seed)

if use_gpu:
    torch.cuda.manual_seed(seed)

device: cuda


In [233]:
# random erasing transform
class RandomErasing(object):
    def __init__(self, erasing_prob, erasing_height = 4, erasing_width = 4):
        self.erasing_prob = erasing_prob
        self.erasing_height = erasing_height
        self.erasing_width = erasing_width
        
    def __call__(self, sample):
        if np.random.rand() < self.erasing_prob:
            return self.random_erase(sample) # anomaly samples have negative label (use abs if you wanna see number)
        else:
            return sample
            
    def random_erase(self, tensor):
        channel, height, width = tensor.size()
        erasing_h = np.random.randint(height - self.erasing_height)
        erasing_h = [erasing_h, erasing_h + self.erasing_height]
        erasing_w = np.random.randint(width - self.erasing_width)
        erasing_w = [erasing_w, erasing_w + self.erasing_width]
        tensor[:, \
               erasing_h[0]:erasing_h[1], \
               erasing_w[0]:erasing_w[1]] = torch.ones(self.erasing_height, self.erasing_width)   
        
        return tensor

In [234]:
# compose transforms (convert to PyTorch Tensor, Normalize, and random erasing(train:0.01,test:0.5 samples contaminated))
tf_train = transforms.Compose([transforms.ToTensor(), 
                               RandomErasing(erasing_prob = 0.01),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# anomaly for test data will be added after
tf_test = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# using SVHN dataset
training_data = datasets.SVHN(root = '../../data', split = 'train', transform = tf_train, download = True)
test_data = datasets.SVHN(root = '../../data', split = 'test', transform = tf_test, download = True)

Using downloaded and verified file: ../../data/train_32x32.mat
Using downloaded and verified file: ../../data/test_32x32.mat


In [235]:
# split to training data and validation data
train_data, validation_data = train_test_split(training_data, test_size = 0.2, random_state = seed)
print('The number of training data:', len(train_data))
print('The number of validation data', len(validation_data))

The number of training data: 58605
The number of validation data 14652


In [236]:
# prepare data loader
train_loader = DataLoader(train_data, batch_size = batchsize, shuffle = True)
validation_loader = DataLoader(validation_data, batch_size = batchsize, shuffle = False)

In [257]:
# define downsampling parts for convenience
class Downsampler(nn.Module):
    def __init__(self, in_channels, out_channels, ksize = 4, ssize = 2, psize = 1):
        super(Downsampler, self).__init__()
        self.cv = nn.Conv2d(in_channels, out_channels, kernel_size = ksize, stride = ssize, padding = psize)
        self.bn = nn.BatchNorm2d(out_channels)
        self.rl = nn.ReLU()
        
    def forward(self, x):
        out = self.cv(x)
        out = self.bn(out)
        out = self.rl(out)
        return out

In [258]:
# define upsampling parts for convenience
class Upsampler(nn.Module):
    def __init__(self, in_channels, out_channels, ksize = 4, ssize = 2, psize = 1):
        super(Upsampler, self).__init__()
        self.tc = nn.ConvTranspose2d(in_channels, out_channels, kernel_size = ksize, stride = ssize, padding = psize)
        self.bn = nn.BatchNorm2d(out_channels)
        self.rl = nn.ReLU()
        
    def forward(self, x):
        out = self.tc(x)
        out = self.bn(out)
        out = self.rl(out)
        return out

In [259]:
# define VAE Encoder
class Encoder(nn.Module):
    def __init__(self, n_z):
        super(Encoder, self).__init__()
        self.cv1 = Downsampler(  3,  32) # out tensor size : (batchsize,  32, 16, 16)
        self.cv2 = Downsampler( 32,  64) # out tensor size : (batchsize,  64,  8, 8)
        self.cv3 = Downsampler( 64, 128) # out tensor size : (batchsize, 128,  4, 4)
        self.cv4 = Downsampler(128, 256) # out tensor size : (batchsize, 256,  2, 2)
        
        self.fc5_mean   = nn.Linear(256*2*2, n_z)
        self.fc5_logvar = nn.Linear(256*2*2, n_z)
    
    def forward(self, x):
        out = self.cv1(x)
        out = self.cv2(out)
        out = self.cv3(out)
        out = self.cv4(out)
        
        out = out.view(out.size(0), -1)
        
        out_mean   = self.fc5_mean(out)
        out_logvar = self.fc5_logvar(out)
        
        return out_mean, out_logvar

In [264]:
# define VAE Decoder
class Decoder(nn.Module):
    def __init__(self, n_z):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(n_z, 256*2*2)
        self.tc2 = Upsampler(256, 128)   # out tensor size : (batchsize, 256,  4,  4)
        self.tc3 = Upsampler(128,  64)   # out tensor size : (batchsize, 128,  8,  8)
        self.tc4 = Upsampler( 64,  32)   # out tensor size : (batchsize,  64, 16, 16)
        self.tc5_mean    = nn.ConvTranspose2d(32, 3, kernel_size = 4, stride = 2, padding = 1) # (batchsize, 3, 32, 32)
        self.tc5_logvar  = nn.ConvTranspose2d(32, 3, kernel_size = 4, stride = 2, padding = 1) # (batchsize, 3, 32, 32)
    
    def forward(self, x):
        out = self.fc1(x)
        
        out = out.view(out.size(0), 256, 2, 2)
        
        out = self.tc2(out)
        out = self.tc3(out)
        out = self.tc4(out)
        out_mean = self.tc5_mean(out)
        out_logvar  = self.tc5_logvar(out)
        
        return out_mean, out_logvar

In [292]:
# define VAE
class VAE(nn.Module):
    def __init__(self, n_z):
        super(VAE, self).__init__()
        self.encoder = Encoder(n_z)
        self.decoder = Decoder(n_z)
    
    def forward(self, x):
        embed_mean, embed_logvar = self.encoder(x)
        
        eps = torch.randn(embed_mean.size()).to(device)
        z = (0.5 * embed_logvar).exp() * eps + embed_mean
        
        out_mean, out_logvar = self.decoder(z)
        
        return out_mean, out_logvar, embed_mean, embed_logvar

In [293]:
# prepare network and optimizer
n_z = 500
net = VAE(n_z)
net = net.to(device)

optimizer = optim.Adam(net.parameters(), lr = 0.001, weight_decay = 0.0001)
                       
# counting trainable parameters in model
num_trainable_params = sum(p.numel() for p in net.parameters() if p.requires_grad)
                       
# モデルの構造、オプティマイザの表示
print('The number of trainable parameters:', num_trainable_params)
print('\nModel:\n', net)
print('\nOptimizer:\n', optimizer)

The number of trainable parameters: 2921006

Model:
 VAE(
  (encoder): Encoder(
    (cv1): Downsampler(
      (cv): Conv2d(3, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (rl): ReLU()
    )
    (cv2): Downsampler(
      (cv): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (rl): ReLU()
    )
    (cv3): Downsampler(
      (cv): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (rl): ReLU()
    )
    (cv4): Downsampler(
      (cv): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (rl): ReLU()
    )
    (fc5_mean): Linear(in_features=1024,

In [294]:
# define loss functions
def loss_D(embed_mean, embed_logvar):
    loss_d = 0.5 * torch.sum(-embed_logvar - 1 + embed_logvar.exp() + embed_mean.pow(2))
    return loss_d

def loss_A(out_logvar):
    log2pi = float(np.log(2*np.pi))
    loss_a = 0.5 * torch.sum(log2pi + out_logvar)
    return loss_a
    
def loss_M(out_mean, out_logvar, in_x):
    loss_m = 0.5 * torch.sum( (out_mean - in_x).pow(2) / out_logvar.exp() )
    return loss_m

In [295]:
# the function run training for 1 epoch
def train(train_loader):
    net.train()
    
    running_loss = 0
    
    for inputs, _ in train_loader:
        inputs = inputs.to(device)
        out_mean, out_logvar, embed_mean, embed_logvar = net(inputs)
        loss = loss_D(embed_mean, embed_logvar) + loss_A(out_logvar) + loss_M(out_mean, out_logvar, inputs)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    average_loss = loss / len(train_loader.dataset)
    
    return average_loss

In [296]:
def validation(validation_loader):
    net.eval()
    
    running_loss = 0
    
    with torch.no_grad():
        for inputs, _ in validation_loader:
            inputs = inputs.to(device)
            out_mean, out_logvar, embed_mean, embed_logvar = net(inputs)
            loss = loss_D(embed_mean, embed_logvar) + loss_A(out_logvar) + loss_M(out_mean, out_logvar, inputs)
            
            running_loss += loss.item()
            
    average_loss = loss / len(validation_loader.dataset)
    
    return average_loss

In [None]:
# run training and save trained model
output_dir = '../../data/VAE_anomaly/'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
n_epochs = 30
train_loss_list = []
validation_loss_list = []
for epoch in range(n_epochs):
    train_loss = train(train_loader)
    validation_loss = validation(validation_loader)
    
    train_loss_list.append(train_loss)
    validation_loss_list.append(validation_loss)
    
    print('epoch[%d/%d] train_loss:%1.4f validation_loss:%1.4f' %(epoch+1, n_epochs, train_loss, validation_loss))
    
np.save(output_dir + 'train_loss_list.npy', np.array(train_loss_list))
np.save(output_dir + 'validation_loss_list.npy', np.array(validation_loss_list))

torch.save(net.state_dict(), output_dir + 'VAE_toy.pth')

epoch[1/30] train_loss:-0.1091 validation_loss:-4.6372
epoch[2/30] train_loss:0.4647 validation_loss:-5.2630
epoch[3/30] train_loss:0.3593 validation_loss:-8.2043
epoch[4/30] train_loss:-0.0276 validation_loss:-8.4736
epoch[5/30] train_loss:-0.0355 validation_loss:-10.6588
epoch[6/30] train_loss:0.3148 validation_loss:-10.3153
epoch[7/30] train_loss:0.7152 validation_loss:-11.0284
epoch[8/30] train_loss:0.7460 validation_loss:-12.2136
epoch[9/30] train_loss:-0.1768 validation_loss:-12.8444
epoch[10/30] train_loss:2.0114 validation_loss:-12.3208
epoch[11/30] train_loss:0.2368 validation_loss:-13.4992
epoch[12/30] train_loss:0.4864 validation_loss:-13.0263
epoch[13/30] train_loss:0.1297 validation_loss:-12.6830
epoch[14/30] train_loss:0.3276 validation_loss:-14.0903
epoch[15/30] train_loss:0.6828 validation_loss:-13.7336
epoch[16/30] train_loss:0.1891 validation_loss:-14.0375
epoch[17/30] train_loss:-0.2484 validation_loss:-14.4665
epoch[18/30] train_loss:0.3398 validation_loss:-14.8557
