## Logistic regression example
- Compare three ways to optimize parameters
    * Hand crafted gradient decent
    * Tensorflow (GradientDescentOptimizer)
    * Keras (Adagrad)

In [None]:
import os, sys
import numpy as np
from random import shuffle
from math import log, floor
import pandas as pd

In [None]:
def load_data(train_data_path, train_label_path, test_data_path):
    X_train = pd.read_csv(train_data_path, sep=',', header=0)
    X_train = np.array(X_train.values)
    Y_train = pd.read_csv(train_label_path, sep=',', header=0)
    Y_train = np.array(Y_train.values)
    X_test = pd.read_csv(test_data_path, sep=',', header=0)
    X_test = np.array(X_test.values)

    return (X_train, Y_train, X_test)

In [None]:
def _shuffle(X, Y):
    randomize = np.arange(len(X))
    np.random.shuffle(randomize)
    return (X[randomize], Y[randomize])

In [None]:
def split_valid_set(X_all, Y_all, percentage):
    all_data_size = len(X_all)
    valid_data_size = int(floor(all_data_size * percentage))

    X_all, Y_all = _shuffle(X_all, Y_all)

    X_train, Y_train = X_all[0:valid_data_size], Y_all[0:valid_data_size]
    X_valid, Y_valid = X_all[valid_data_size:], Y_all[valid_data_size:]

    return X_train, Y_train, X_valid, Y_valid

In [None]:
def normalize(X_all, X_test):
    # Feature normalization with train and test X
    X_train_test = np.concatenate((X_all, X_test))
    mu = (sum(X_train_test) / X_train_test.shape[0])
    sigma = np.std(X_train_test, axis=0)
    mu = np.tile(mu, (X_train_test.shape[0], 1))
    sigma = np.tile(sigma, (X_train_test.shape[0], 1))
    X_train_test_normed = (X_train_test - mu) / sigma

    # Split to train, test again
    X_all = X_train_test_normed[0:X_all.shape[0]]
    X_test = X_train_test_normed[X_all.shape[0]:]
    return X_all, X_test

In [None]:
def sigmoid(z):
    res = 1 / (1.0 + np.exp(-z))
    return np.clip(res, 1e-8, 1-(1e-8))

In [None]:
# File IO
X_all, Y_all, X_test = load_data('./X_train.dms', './Y_train.dms', './X_test.dms');
X_all, X_test = normalize(X_all, X_test);

In [None]:
# Split a 10%-validation set from the training set
valid_set_percentage = 0.1;
X_train, Y_train, X_valid, Y_valid = split_valid_set(X_all, Y_all, valid_set_percentage);

## Hyperparameters

In [None]:
hm_epochs = 200
batch_size = 32
step_num = int(floor(len(X_train) / batch_size))
display_step = 50

## Method 1: Hand crafted gradient decent

In [None]:
%%time
w_t = np.zeros((106,))
b_t = np.zeros((1,))

# Training Cycle
for epoch in range(hm_epochs):
    
    # Random shuffle
    X_train, Y_train = _shuffle(X_train, Y_train)
    
    total_loss = 0
    
    # Loop over all batches
    for i in range(step_num):
        X = X_train[i*batch_size:(i+1)*batch_size]
        Y = Y_train[i*batch_size:(i+1)*batch_size]
        z_t = np.dot(X, np.transpose(w_t)) + b_t
        y_t = sigmoid(z_t)

        cross_entropy = -1 * (np.dot(np.squeeze(Y), np.log(y_t)) + np.dot((1 - np.squeeze(Y)), np.log(1 - y_t)))
        total_loss += cross_entropy/batch_size/step_num

        w_grad = np.sum(-1 * X * (np.squeeze(Y) - y_t).reshape((batch_size,1)), axis=0)
        b_grad = np.sum(-1 * (np.squeeze(Y) - y_t))
        w_t -= 0.01 * w_grad
        b_t -= 0.01 * b_grad
    
    # Display logs per epoch step
    if (epoch+1) % display_step == 0:
        # Accuracy
        z = (np.dot(X_valid, np.transpose(w_t)) + b_t)
        y = sigmoid(z)
        y_ = np.around(y)
        result = (np.squeeze(Y_valid) == y_)
        
        print('Epoch', '%04d' %(epoch+1), 'loss: ', "{:.9f}".format(total_loss), 'acc: ', "{:.9f}".format(float(result.sum()) / len(X_valid)),'(Hand crafted)')

## Method 2: Tensorflow

In [None]:
%%time
import tensorflow as tf
# Set model weights
W = tf.Variable(tf.zeros([106,1]))
b = tf.Variable([0.0])

# tf Graph Input
x = tf.placeholder('float', [None, 106])
y = tf.placeholder('float')


# Prediction and cost function
prediction = tf.matmul(x, W) + b
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=prediction))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost)

# Run the initializer
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Accuracy
predict_op  = tf.greater_equal(prediction, tf.zeros_like(prediction))
correct_op  = tf.equal(tf.cast(predict_op, tf.float32), y)
accuracy_op = tf.reduce_mean(tf.cast(correct_op, tf.float32))

# Training Cycle
for epoch in range(hm_epochs):
    
    # Random shuffle
    X_train, Y_train = _shuffle(X_train, Y_train)
    
    epoch_loss = 0
    
    # Loop over all batches
    for i in range(step_num):
        epoch_x = X_train[i*batch_size:(i+1)*batch_size]
        epoch_y = Y_train[i*batch_size:(i+1)*batch_size]
        _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
        epoch_loss += c/step_num
    
    # Display logs per epoch step
    if (epoch+1) % display_step == 0:
        accuracy = sess.run(accuracy_op, {x:X_valid, y:Y_valid})
        print('Epoch', '%04d' %(epoch+1),'loss: ', "{:.9f}".format(epoch_loss),'acc: ', "{:.9f}".format(accuracy))

sess.close()

## Method 3: Keras

In [None]:
%%time
from keras.models import Sequential 
from keras.layers import Dense, Activation

output_dim = nb_classes = 1;

model = Sequential() 
model.add(Dense(output_dim, input_dim=X_train.shape[1], activation='sigmoid')) 
model.compile(optimizer='adagrad', loss='binary_crossentropy', metrics=['accuracy']) 
history = model.fit(X_train, Y_train, batch_size=batch_size,
                    epochs=hm_epochs,verbose=1, validation_data=(X_valid, Y_valid)) 
score = model.evaluate(X_valid, Y_valid, verbose=0) 
print('Valid score:', score[0]) 
print('Valid accuracy:', score[1])

In [64]:
%%time
A = np.random.random((3, 3))
B = np.random.random((3, 3))
np.dot(A,B)

CPU times: user 172 µs, sys: 180 µs, total: 352 µs
Wall time: 251 µs


In [66]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    A_ = tf.placeholder('float',[3,3])
    B_ = tf.placeholder('float',[3,3])
    %%time sess.run(tf.tensordot(A_,B_,axes=1), feed_dict={A_: A, B_: B})

CPU times: user 75.3 ms, sys: 1.97 ms, total: 77.3 ms
Wall time: 77.2 ms
