In [1]:
import numpy as np
import datasets
from scipy.optimize import minimize
from softmax import softmax, log_softmax

In [4]:
def crossentropy_loss(X, Y, decode):
    def loss(params):
        W, b = decode(params)
        return -sum([Y[i].dot(ll) for i, ll in enumerate(log_softmax(X.dot(W) + b))])

    def grad(params):
        W, b = decode(params)
        return sum([np.kron(mu - Y[i], X[i]) for i, mu in enumerate(softmax(X.dot(W) + b))])

    def hess(params):
        W, b = decode(params)
        o = lambda x: np.outer(x, x)
        return sum([np.kron(np.diag(mu) - o(mu), o(X[i])) for i, mu in enumerate(softmax(X.dot(W) + b))])
    
    return loss, grad, hess

def fit(X, y):
    Y = datasets.one_hot(y)
    N, D = X.shape
    N, C = Y.shape
    loss = crossentropy_loss

    params = [0] * (D + 1) * C
    decode = lambda params: (params[:-C].reshape(D, C), params[-C:])

    loss, grad, hess = loss(X, Y, decode)

    # params = minimize(loss, params, method = 'Newton-CG', jac = grad, hess = hess).x
    params = minimize(loss, params).x
    return decode(params)

def predict(model, X):
    W, b = model
    return np.argmax(softmax(X.dot(W) + b), axis = 1)

X, y = datasets.iris()

model = fit(X, y)
print '%0.3f' % np.mean(predict(model, X) != y)

0.013


In [13]:
import tensorflow as tf

X, y = datasets.htwt()

trX, trY = X, datasets.one_hot(y)
teX, teY = X, datasets.one_hot(y)
N, D = trX.shape
N, C = trY.shape

epochs = 1000
batch_size = 2
learning_rate = 0.1
H = 100
# maybe preprocess X
R = lambda Ws: 0 # regularization penalty
init_weights = lambda fan_in, fan_out: np.random.randn(fan_in, fan_out) * 0.05
# init_weights - try glorot
accuracy = lambda sm, y: np.mean(np.argmax(sm, axis = 1) == np.argmax(y, axis = 1))

def dense(output_dim, x):
    shape = (x.get_shape()[1].value, output_dim)
    w = tf.Variable(tf.random_normal_initializer(0.0, 0.05)(shape))
    b = tf.Variable(tf.zeros(shape[1], dtype = tf.float32))
    return tf.matmul(x, w) + b  

x = tf.placeholder(tf.float32, (None, D))
y = tf.placeholder(tf.float32)
H1 = tf.nn.relu(dense(H, x))  
H2 = dense(trY.shape[1], H1)
loss = tf.nn.softmax_cross_entropy_with_logits(H2, y)
optimizer = tf.train.AdamOptimizer().minimize(loss)

with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    for _ in xrange(epochs):
        for start in xrange(0, N - batch_size + 1, batch_size):
            end = start + batch_size            
            feed_dict = {x: trX[start : end], y: trY[start : end]}
            sess.run(optimizer, feed_dict = feed_dict)

    sm = H2.eval({x: teX})
    print accuracy(sm, teY)

0.819047619048
