In [1]:
import numpy as np
from scipy import optimize
import itertools

In [2]:
def initialize_weights(L_in, L_out):
    e = 0.12 # sigma
    t = np.random.random((L_out, L_in + 1)) * 2 * e - e
    return t


def recode_label(y,num_labels):
    '''theres a chance this needs to be the other way'''
    rows = len(y)
    out = np.zeros((rows, num_labels))
    for i in range(0, rows):
        row_answer = int(y[i])
        out[i, row_answer] = 1
        
    return out

def param_unroll( nn_params, input_layer_size, hidden_layer_size, num_labels ):
    '''
    theta1 shape: (30, 785) (hidden_size, input_size + 1)
    theta2 shape: (26, 31)  (num_labels, hidden_size + 1)
    '''
    theta1_elems = ( input_layer_size + 1 ) * hidden_layer_size
    theta1_size  = ( input_layer_size + 1, hidden_layer_size  )
    theta1 = nn_params[:theta1_elems].T.reshape( theta1_size ).T
    print(f'number of elements in theta1: {theta1_elems}')

    

    theta2_size  = ( hidden_layer_size + 1, num_labels )
    theta2 =  None# nn_params[theta1_elems:].T.reshape( theta2_size ).T

    return (theta1, theta2)

In [3]:
def feed_forward(theta1, theta2, X):
    '''
    a1 = (m, input_layer_size + 1), a2 = (m, hidden_layer_size + 1), a3= (m, num_labels)
    theta1 = (hidden_layer_size, input_layer_size + 1)
    theta2 = (num_labels, hidden_layer_size)
    '''
    # Input layer
    m, _ = np.shape(X)
    one_rows = np.ones((1, np.shape(X)[0] ))
    a1 = r_[one_rows, X.T]  if X_bias is None else X_bias
    # Hidden layer
    z2 = a1.dot(theta1.T)
    a2 = sigmoid(z2)
    a2 = np.c_[np.ones((np.shape(a2)[0], 1)), a2] # bias for hidden layer
    # Output layer
    z3 = a2.dot(theta2.T)
    a3 = sigmoid(z3) #a3 = h(x)
    return (a1, a2, a3, z2, z3)

In [4]:
def compute_cost( nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lamda, yk = None, X_bias = None ):
    theta1, theta2 = param_unroll( nn_params, input_layer_size, hidden_layer_size, num_labels )
    a1,a2,a3,z2,z3 = feed_forward(theta1, theta2, X, X_bias)
    
    if yk is None:
        yk = recode_label(y, num_labels)
        assert shape(yk) == shape(a3), "Error, shape of recoded y is different from a3"
    
    # J(theta) function: cross-entropy
    term1 = (-y_k * np.log(a3))
    term2 = (1 - y_k) * np.log(1 - a3)
    cost = np.sum(term1 + term2)/m
    # Regularization sum
    reg_term = np.sum(theta1 ** 2) + np.sum(theta2[:,1:] ** 2)
    reg_term = (_lambda/2/m) * reg_term
    return(cost + reg_term)

In [5]:
# loading input data
data = np.genfromtxt('./data/5.csv', delimiter=',')
y = data[:,0].reshape(-1,1)
X = data[:, 1:] #(m, input_layer_size)
m = len(y)

# Network architecture 
input_layer_size = 784
hidden_layer_size = 30
num_labels = 26
lam = 1.0

# Params
theta1 = initialize_weights( 784, 30 ) # (input_size, hidden layer 1 size)
theta2 = initialize_weights( 30, 26 )  # (hidden layer 1 size, # labels)
unrolled = np.r_[theta1.T.flatten(), theta2.T.flatten()] # 1 dimension (24356,)

# matrix holding correct values
y_k = recode_label(y,num_labels)

X_bias = np.r_[ np.ones((1, np.shape(X)[0] )), X.T].T #(input_size + 1, m)
print(np.shape(X_bias))


(5, 785)


In [10]:
# Debugging the parameters before
print(f'rolled params: {np.shape(unrolled)}')
print(f'theta1 shape: {np.shape(theta1)}')
print(f'theta2 shape: {np.shape(theta2)}')
print(f'theta1 size: {np.size(theta1)}')
print(f'theta2 size: {np.size(theta2)}')
print('='*20)

t1,t2 = param_unroll(unrolled, input_layer_size, hidden_layer_size, num_labels)

# Debugging after:
print(f'unrolled theta1 shape: {np.shape(t1)}')


rolled params: (24356,)
theta1 shape: (30, 785)
theta2 shape: (26, 31)
theta1 size: 23550
theta2 size: 806
number of elements in theta1: 23550
unrolled theta1 shape: (30, 785)


In [7]:
# print(compute_cost(unrolled,input_layer_size, hidden_layer_size, num_labels, X, y, lam, y_k, X_bias))

# result = scipy.optimize.fmin_cg(compute_cost, fprime=compute_gradient, x0=unraveled,
#     args=(input_layer_size, hidden_layer_size, num_labels, X, y, lamda, y_k, X_bias),
#     maxiter=50, disp=True, full_output=True )
