In [1]:
from __future__ import division
from __future__ import print_function

In [2]:
import numpy as np

random_gen = np.random.RandomState(45)

## Параметры тестового автокодировщика

In [3]:
num_inputs = 10
num_outputs = 10
num_hidden_units = 2

num_objects = 50

## Функция потерь и ее градиент

In [28]:
def loss_function(autoencoder, weights, inputs):
    autoencoder.net.set_weights(weights)
    outputs = autoencoder.net.compute_outputs(inputs)
    loss_value = 0.5 * np.sum((inputs - outputs) ** 2, axis=0).mean()
    
    return loss_value

def loss_function_grad(autoencoder, weights, inputs):
    autoencoder.net.set_weights(weights)
    
    _ = autoencoder.net.compute_outputs(inputs)
    _, grad = autoencoder.compute_loss(inputs)
    
    return grad

def GN_loss_function(autoencoder, weights, inputs, direction):
    autoencoder.net.set_weights(weights)    
    outputs = autoencoder.net.compute_outputs(inputs)
    
    return np.sum(outputs * direction, axis=0).mean()

## Создание автокодировщика

In [5]:
from layers import FCLayer
from activations import SigmoidActivationFunction, LinearActivationFunction
from autoencoder import Autoencoder

hidden_layer = FCLayer(shape=(num_inputs, num_hidden_units), 
                       afun=SigmoidActivationFunction(), 
                       use_bias=True)

output_layer = FCLayer(shape=(num_hidden_units, num_outputs),
                       afun=LinearActivationFunction(),
                       use_bias=True)

autoencoder = Autoencoder([hidden_layer, output_layer])

## Инициализация весов сети

In [6]:
weights_1 = random_gen.uniform(low=-1 / np.sqrt(num_inputs), 
                               high=1 / np.sqrt(num_inputs), 
                               size=(num_hidden_units, num_inputs))
weights_1 = np.c_[weights_1, np.zeros(num_hidden_units)]

weights_2 = random_gen.uniform(low=-1 / np.sqrt(num_hidden_units), 
                               high=1 / np.sqrt(num_hidden_units), 
                               size=(num_outputs, num_hidden_units))
weights_2 = np.c_[weights_2, np.zeros(num_outputs)]

weights = np.r_[weights_1.ravel(), weights_2.ravel()]

autoencoder.net.set_weights(weights)

## Создаем тестовый мини-батч

In [7]:
inputs = random_gen.normal(size=(num_inputs, num_objects))

## Проверка корректности вычисления градиента

In [15]:
from approx_gradient import compute_approx_grad

_, exact_grad = autoencoder.compute_loss(inputs)

num_params = autoencoder.net.params_number

direction = np.zeros_like(weights)
approx_grad = np.zeros_like(weights)

for i in range(num_params):
    direction[:] = 0
    direction[i] = 1

    approx_grad[i] = compute_approx_grad(lambda x: loss_function(autoencoder, x, inputs), weights, direction)

#### Порядок разницы между точным градиентом и его аппроксимацией

In [16]:
max_absolute_diff = np.abs(exact_grad - approx_grad).max()

np.floor(np.log10(max_absolute_diff))

-7.0

## Проверка корректности вычисления гессиана на вектор

In [17]:
from approx_gradient import compute_approx_grad

num_test_direction = 5
num_params = autoencoder.net.params_number

direction_pool = [random_gen.normal(size=num_params) for _ in range(num_test_direction)]

exact_Hps = [autoencoder.compute_hessvec(direction) 
             for direction in direction_pool]

approx_Hps = [compute_approx_grad(lambda x: loss_function_grad(autoencoder, x, inputs), weights, direction) 
              for direction in direction_pool]

#### Порядки разницы между точным произведением гессиана на вектор и его аппроксимацией

In [18]:
max_absolute_diffs = [np.abs(exact_Hp - approx_Hp).max() for exact_Hp, approx_Hp in zip(exact_Hps, approx_Hps)]
np.floor(np.log10(max_absolute_diffs))

array([-8., -8., -8., -8., -8.])

## Проверка корректности вычисления градиента функции потерь для гаусс-ньютоновской аппрокимации

In [29]:
from approx_gradient import compute_approx_grad

# A product of jaccobian and some vector
qs = random_gen.normal(size=(num_outputs, num_objects))

exact_grad = autoencoder.net.compute_loss_grad(qs)

direction = np.zeros_like(weights)
approx_grad = np.zeros_like(weights)

for i in range(num_params):
    direction[:] = 0
    direction[i] = 1

    approx_grad[i] = compute_approx_grad(lambda x: GN_loss_function(autoencoder, x, inputs, qs), weights, direction)

#### Порядок разницы между точным градиентом и его аппроксимацией

In [32]:
max_absolute_diff = np.abs(exact_grad - approx_grad).max()

np.floor(np.log10(max_absolute_diff))

-9.0