In [1]:
import tensorflow as tf
import numpy as np
import toy_data
import bokeh

In [2]:
bokeh.io.output_notebook()

### Multi-class Logistic Regression

* Assumption:

$$
Y|X \sim \text{Categorical}(p_1, \ldots, p_n = \frac{e^{w_n X}}{\sum_{k=1}^{n}{e^{w_k X}} })
$$

* Log likelihood:

$$
\begin{align*}
    =& \log{\prod_{i=1}^{n} {P(Y_i \mid X_i, w_1, \ldots, w_n)}} \\
    =& \sum_{i=1}^{n}{\log{P(Y_i \mid X_i, w_1, \ldots, w_n)}} \\
    & (\text{Assume \( Y_i \) is catigorical distrubuted with } p_1, \ldots, p_n) \\
    =& \sum_{i=1}^{n}{\log{\prod_{k=1}^{n}{P(Y_i=1 \mid X_i, w_k)^{I_{Y_i=k}}}}} \\
    =& \sum_{i=1}^{n}{\sum_{k=1}^n {\log{p_k^{I_{Y_i=k}}}}} \\
    =& \sum_{i=1}^{n}{\sum_{k=1}^n {I_{Y_i=k}\log{p_k}}} \\
    =& \sum_{i=1}^{n}{\sum_{k=1}^n {Y_i\log{\frac{e^{w_n X_i}}{\sum_{k=1}^{n}{e^{w_k X_i}}}}}} \\
    =& \sum_{i=1}^{n}{\sum_{k=1}^n {Y_i(w_k X_i - \log{\sum_{k=1}^{n}{e^{w_k X_i}}})}} \\
\end{align*}
$$

* Gradient of log likelihood

$$
\begin{align*}
    &\nabla_W{\sum_{i=1}^{n}{\sum_{k=1}^n {Y_i(w_k X_i - \log{\sum_{k=1}^{n}{e^{w_k X_i}}})}}}\\
    =& \sum_{i=1}^{n}{Y_i X_i + \hat{Y}_i X_i} \\
    &(\text{where } \hat{Y}_i = \frac{e^{w_n X_i}}{\sum_{k=1}^{n}{e^{w_k X_i}}})
\end{align*}
$$


In [3]:
Nclass = 4
gm = toy_data.GaussianMixture(n_class=Nclass)
toy_data.visualize_2D(gm.Classes, gm.class_colors)

In [4]:
x = tf.placeholder(tf.float32, [None, 2])

In [5]:
W = tf.Variable(tf.zeros([2, Nclass]))
b = tf.Variable(tf.zeros([Nclass]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, Nclass])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
train_step = tf.train.RMSPropOptimizer(0.1).minimize(cross_entropy)

In [6]:
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

for i in range(100):
    sess.run(train_step, feed_dict={x: gm.tr.X, y_: gm.tr.y})

In [7]:
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: gm.tst.X, y_: gm.tst.y}))

0.625


In [8]:
def classifier(X):
    return sess.run(y, feed_dict={x: X})

toy_data.visualize_2D(gm.Classes, gm.class_colors, classifyF=classifier, res=100)