In [1]:
import numpy as np
import datasets
from scipy.optimize import minimize
from softmax import softmax, log_softmax

In [2]:
X, y = datasets.htwt()

In [3]:
def crossentropy_loss(X, Y, decode):
    def loss(params):
        W, b = decode(params)
        return -sum([Y[i].dot(ll) for i, ll in enumerate(log_softmax(X.dot(W) + b))])

    def grad(params):
        W, b = decode(params)
        return sum([np.kron(mu - Y[i], X[i]) for i, mu in enumerate(softmax(X.dot(W) + b))])

    def hess(params):
        W, b = decode(params)
        o = lambda x: np.outer(x, x)
        return sum([np.kron(np.diag(mu) - o(mu), o(X[i])) for i, mu in enumerate(softmax(X.dot(W) + b))])
    
    return loss, grad, hess

def fit(X, y):
    Y = datasets.one_hot(y)
    N, D = X.shape
    N, C = Y.shape
    loss = crossentropy_loss

    params = [0] * (D + 1) * C
    decode = lambda params: (params[:-C].reshape(D, C), params[-C:])

    loss, grad, hess = loss(X, Y, decode)

    # params = minimize(loss, params, method = 'Newton-CG', jac = grad, hess = hess).x
    params = minimize(loss, params).x
    return decode(params)

def predict(model, X):
    W, b = model
    return np.argmax(softmax(X.dot(W) + b), axis = 1)

X, y = datasets.iris()

model = fit(X, y)
print '%0.3f' % np.mean(predict(model, X) != y)

0.013


In [44]:
import tensorflow as tf

def fit(X, y):
    X = X.astype(np.float32)
    Y = datasets.one_hot(y).astype(np.float32)
    N, D = X.shape
    N, C = Y.shape
    
    x = tf.constant(X)
    y = tf.constant(Y)

    W = tf.Variable(tf.zeros((D, C)))
    b = tf.Variable(tf.zeros(C))

    loss = tf.nn.softmax_cross_entropy_with_logits(tf.matmul(x, W) + b, y)
    optimizer = tf.train.AdamOptimizer().minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        sess.run(optimizer)
        return W.eval(), b.eval()

def predict(model, X):
    X = X.astype(np.float32)
    W, b = model
    N, D = X.shape

    with tf.Session() as sess:
        sm = (tf.matmul(X, W) + b).eval()
    
    return np.argmax(sm, axis = 1)
                    
model = fit(X, y)
print '%0.3f' % np.mean(predict(model, X) != y)

0.667
