# Задание 5. Нейросетевой разреженный автокодировщик

__Выполнил Алескин Александр, 317 группа.__


Подгрузим все необходимые библиотеки:

In [1]:
import numpy as np
import scipy.optimize as opt
import sklearn as skl
import matplotlib.pyplot as plt
import sys

import importlib

%matplotlib inline

import sample_patches as patch
import display_layer as display
import gradient as grad
import autoencoder as coder

### Часть I. Вывод и анализ работы алгоритма

In [2]:
train = np.load('train.pk')
dt = patch.sample_patches(train['X'])

In [38]:
data = dt[:10]
ans = data
alfa = 0.001
p = 0.01
beta = 3
theta = coder.initialize(np.array([75]), 192)
shapes = np.array([192, 75, 192])

In [89]:
def grad_f(theta, X, Y, shapes, alfa, beta, p):
    
    size1 = shapes[0]
    size2 = shapes[1]
    size3 = shapes[2]
    n = Y.shape[0]
    
    if np.isclose(round(size2), size2):
        size2 = round(size2)
    else:
        TypeError('Problems with shapes of theta')
    
    W1 = theta[:size1 * size2].reshape(size1, size2)
    b1 = theta[size1 * size2 : (size1 + 1) * size2]
    W2 = theta[(size1 + 1) * size2 : theta.shape[0] - size3].reshape(size2, size3)
    b2 = theta[theta.shape[0] - size3:]
    
    A1 = X.dot(W1) + b1
    X1_exp = np.exp(- A1)
    X1 = 1 / (1 + X1_exp)
    X1_del = X1_exp * X1 ** 2
    A2 = X1.dot(W2) + b2
    X2_exp = np.exp(-A2)
    X2 = 1 / (1 + X2_exp)
    
    S2 = - (Y - X2) * X2_exp * X2 ** 2 / n
    dW2 = X1.T.dot(S2) + alfa * W2
    db2 = np.sum(S2, axis = 0)
    S1 = S2.dot(W2.T) * X1_del
    dW1 = X.T.dot(S1) + alfa * W1 
    db1 = np.sum(S1, axis = 0)
    
    p_real = np.sum(X1, axis = 0) / n
    
    coef = beta *((p - p_real) / (p_real - 1) / p_real)
    dW1 += X.T.dot(coef *X1_del) / n 
    db1 += coef * np.sum(X1_del, axis = 0) / n
    
    return np.concatenate((dW1.reshape(-1), db1.reshape(-1), dW2.reshape(-1), db2.reshape(-1)))

In [90]:
def grad_f2(theta, visible_size, hidden_size, lambda_, sparsity_param, beta, data):
    
    sizes = np.concatenate((np.array([visible_size]), hidden_size, np.array([visible_size])))
    offset = 0
    n = data.shape[0]
    J = 0
    Z = [data]
    alfa = lambda_ 
    
    for i in range(len(sizes) - 1):
        W = theta[offset : offset + sizes[i] * sizes[i + 1]]
        W = W.reshape(sizes[i], sizes[i + 1])
        b = theta[offset + sizes[i] * sizes[i + 1] : offset + (sizes[i] + 1) * sizes[i + 1]]
        offset += (sizes[i] + 1) * sizes[i + 1]
        A = Z[-1].dot(W) + b
        Z.append(1 / (1 + np.exp(-A)))
        J += alfa / 2 * (np.sum(W ** 2))

    S = data - Z[-1]
    J += np.sum((S) ** 2) / (n * 2)
    dJ = np.array([])
    S = - S / n
    offset = theta.size
    

    for i in range(2, len(sizes) + 1):
        W = theta[offset -  (sizes[-i] + 1) * sizes[-i + 1] : offset - sizes[-i + 1]]
        W = W.reshape(sizes[-i], sizes[-i + 1])
        b = theta[offset - sizes[-i + 1] : offset]
        offset -= (sizes[-i] + 1) * sizes[-i + 1]
        
        Z_del = (1 - Z[-i + 1]) * Z[-i + 1] 
        S = S * Z_del
        dW = Z[-i].T.dot(S) + alfa * W
        db = np.sum(S, axis = 0)
        
        p_real = np.sum(Z[-i], axis = 0) / n
        if i > 2:
            J += beta * np.sum(p * np.log(p / p_real2) + (1 - p) * np.log((1 - p) / (1 - p_real2)))
            
        if i < len(sizes):
            p_real2 = p_real
            coefs = beta * ((1 - p) / (1 - p_real) - p / p_real) / n
            S = S.dot(W.T) + coefs
        
        dJ = np.concatenate((dW.reshape(-1), db.reshape(-1), dJ))

    return (J, dJ)

In [84]:
dJ = grad_f(theta, data, ans, shapes, alfa, beta, p)

In [91]:

J2, dJ3 = grad_f2(theta, 192, np.array([75]), alfa, p, beta, data)

In [92]:
np.sum(np.isclose(dJ, dJ3, atol = 1e-5) == False)

0

In [93]:
np.argwhere(np.isclose(dJ, dJ3, atol = 1e-8) == False)

array([], shape=(0, 1), dtype=int32)

In [88]:
print(dJ[:10], dJ3[:10])

[ 0.61321968  0.6411928   0.60153998  0.89390635  0.99805422  0.64741748
  0.6286063   0.94655024  0.6150312   0.91611202] [ 6.14045413  6.3513312   5.98599065  8.895319    9.91991556  6.44899886
  6.22378198  9.42182293  6.095314    9.13868502]


In [50]:
def loss_func2(theta, visible_size, hidden_size, lambda_, sparsity_param, beta, data):
    
    sizes = np.concatenate((np.array([visible_size]), hidden_size, np.array([visible_size])))
    offset = 0
    n = data.shape[0]
    Z = data
    p = sparsity_param
    J = 0
    alfa = lambda_ / 2
    
    for i in range(len(sizes) - 1):
        W = theta[offset : offset + sizes[i] * sizes[i + 1]]
        W = W.reshape(sizes[i], sizes[i + 1])
        b = theta[offset + sizes[i] * sizes[i + 1] : offset + (sizes[i] + 1) * sizes[i + 1]]
        offset += (sizes[i] + 1) * sizes[i + 1]
        A = Z.dot(W) + b
        Z = 1 / (1 + np.exp(-A))
        J += alfa * (np.sum(W ** 2))
        if i < len(sizes) - 2:
            p_real = np.sum(Z, axis = 0) / n
            J += beta * np.sum(p * np.log(p / p_real) + (1 - p) * np.log((1 - p) / (1 - p_real)))
               
    J += np.sum(np.sum((Z - data) ** 2, axis = 1)) / (n * 2)
    
    return J

In [51]:
loss_func(theta, data, ans, shapes, alfa, beta, p)

168.3938033813709

In [52]:
loss_func2(theta, 192, np.array([75]), alfa, p, beta, data)

168.39380338137087

In [None]:
dJ3 = grad.compute_gradient(lambda theta: loss_func2(theta, 192, np.array([75]), alfa, p, beta, data) , theta)