In [265]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from time import time

### Load IMDB tfidf data. 

In [235]:
df_train = pd.read_csv("data/imdb/train.csv")
df_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,sentiment
0,0.776831,0.0,0.0,0.0,0.0155,0.0,0.01595,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.789451,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.76342,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.829025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.81718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [236]:
X_train = df_train.iloc[:, :-1].values
y_train = df_train.sentiment.values.astype(np.float32)

In [237]:
df_test = pd.read_csv("data/imdb/test.csv")
df_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,sentiment
0,0.808599,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.788513,0.0,0.008864,0.0,0.006284,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.76839,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.816795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.77171,0.0,0.014865,0.0,0.010539,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [238]:
X_test = df_test.iloc[:, :-1].values
y_test = df_test.sentiment.values.astype(np.float32)

In [239]:
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

### Run Logistic Regression classifier

In [240]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

lr = LogisticRegression(C=1, random_state=1)
lr.fit(X_train_std, y_train)
y_test_pred = lr.predict(X_test_std)
print("Accuracy: ", accuracy_score(y_test, y_test_pred))
confusion_matrix(y_test, y_test_pred)

Accuracy:  0.61476


array([[7941, 4559],
       [5072, 7428]])

In [241]:
lr.coef_

array([[-0.71279154, -0.03989314, -0.00945179, -0.05862788, -0.12234314,
        -0.04317324, -0.0152893 ,  0.02652596,  0.08176166,  0.08939853,
         0.05202108, -0.03179782, -0.25265378, -0.04101148, -0.44233956,
         0.09978523, -0.06799937, -0.17086883,  0.10906907, -0.15115334,
         0.00384509, -0.19009448, -0.21940036,  0.03598855, -0.05612759,
        -0.19516636, -0.03242399, -0.08557588,  0.0203025 ,  0.06574934,
         0.0483843 , -0.05347545, -0.03038236, -0.04955422, -0.00404738,
         0.01646328, -0.02550116, -0.01609564,  0.10755131,  0.00847538,
        -0.00293419,  0.10394609,  0.01345728,  0.02753872, -0.07293686,
         0.03101284, -0.0165269 , -0.01719957,  0.04025995, -0.00439755,
         0.01630796, -0.0869459 , -0.00077606,  0.08809349, -0.01037629,
         0.01744801, -0.0220454 ,  0.03226595,  0.00429942, -0.14439276,
        -0.01580498, -0.17110365, -0.02108478,  0.11651601,  0.04969867,
        -0.05768173, -0.05024891,  0.05170648]])

In [242]:
y_train.dtype

dtype('float32')

# Building Binary Classifier

In [243]:
class Batchable:

    def __init__(self, X, y, batch_size = 256, seed = None):
        import math
        import numpy as np
        if seed:
            np.random.seed(seed)
        idx = np.arange(X.shape[1])
        np.random.shuffle(idx)
        self.X = X[:, idx]
        self.y = y[:, idx]
        self.start = 0
        self.batch_size = batch_size
        self.num_batches = math.ceil(X.shape[0] / batch_size)
    
    def next(self):
        end = self.start + self.batch_size
        if end > self.X.shape[1]:
            end = self.X.shape[1] - 1
        return self.X[:, self.start: (end + 1)], self.y[:, self.start: (end + 1)]

In [244]:
tf.reset_default_graph()

    
def binary_classification(X_train, y_train, X_test, y_test):
    learning_rate = 0.01
    epochs = 1000
    n_x, m = X_train.shape

    X = tf.placeholder(dtype=tf.float32, shape = [n_x, None], name="X")
    y = tf.placeholder(dtype=tf.float32, shape = [1, None], name="y")
    
    threshold = tf.constant(0.5)

    n_h = 5

    W1 = tf.get_variable("W1", [1, n_x], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", [1,1], initializer = tf.zeros_initializer())

    Z1 = tf.matmul(W1, X) + b1
    #A1 = tf.nn.tanh(Z1)

    #W2 = tf.get_variable("W2", [1, n_h], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    #b2 = tf.get_variable("b2", [1,1], initializer = tf.zeros_initializer())

    #Z2 = tf.matmul(W2, A1) + b2
    AL = tf.nn.sigmoid(Z1)


    y_pred = tf.where(AL > threshold, tf.ones_like(AL), tf.zeros_like(AL))

    correct_prediction = tf.equal(y, y_pred)

    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    regularizer = tf.nn.l2_loss(W1)

    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=Z1))

    training = tf.train.AdamOptimizer(learning_rate).minimize(cost)

    init = tf.global_variables_initializer()
    costs = []

    with tf.Session() as session:
        session.run(init)
        for epoch in range(epochs):
            batchable = Batchable(X_train, y_train)
            for i in range(batchable.num_batches):
                X_data, y_data = batchable.next()
                _, training_cost_, accuracy_ = session.run([training, cost, accuracy], feed_dict = {X: X_data, y: y_data})
                costs.append(training_cost_)
            if epoch % 100 == 0 or epoch == epochs-1:
                print({"cost": training_cost_, "training accuracy": accuracy_})
                
        test_accuracy = session.run(accuracy, feed_dict = {X: X_test, y: y_test})
        print("Test accuracy", test_accuracy)
        
binary_classification(X_train_std.T, y_train.reshape(1, -1), X_test_std.T, y_test.reshape(1, -1)) 

{'cost': 0.87126076, 'training accuracy': 0.50583655}
{'cost': 0.65061682, 'training accuracy': 0.66147858}
{'cost': 0.66520262, 'training accuracy': 0.61089492}
{'cost': 0.67054105, 'training accuracy': 0.59143966}
{'cost': 0.63096231, 'training accuracy': 0.63035017}
{'cost': 0.63412017, 'training accuracy': 0.62256807}
{'cost': 0.6975162, 'training accuracy': 0.56031126}
{'cost': 0.64336205, 'training accuracy': 0.64202332}
{'cost': 0.65301967, 'training accuracy': 0.61867702}
{'cost': 0.67494446, 'training accuracy': 0.53696495}
{'cost': 0.68239218, 'training accuracy': 0.54863811}
Test accuracy 0.61256


# Multi Class Classifier

In [225]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()

In [247]:
df_train = pd.read_csv("data/MNIST/mnist_train.csv", header= None)
y_train = df_train.iloc[:, 0]
X_train = df_train.iloc[:, 1:].values/255
X_train_std = scaler.fit_transform(X_train)
y_train = ohe.fit_transform(y_train.values.reshape(-1, 1)).toarray()

In [248]:
df_test = pd.read_csv("data/MNIST/mnist_test.csv", header=None)
y_test = df_test.iloc[:, 0]
X_test = df_test.iloc[:, 1:].values/255
X_test_std = scaler.fit_transform(X_test)
y_test = ohe.fit_transform(y_test.values.reshape(-1, 1)).toarray()

In [231]:
y_train.shape, y_test.shape

((10, 60000), (10, 10000))

In [269]:
tf.reset_default_graph()

def multi_layer(X_train, y_train, X_test, y_test, learning_rate= 0.001, epochs = 1000):
    
    n_x, m = X_train.shape
    n_y = 10
    X = tf.placeholder(dtype=tf.float32, shape=[n_x, None], name = "X")
    Y = tf.placeholder(dtype=tf.float32, shape=[n_y, None], name = "Y")
    tf.set_random_seed(1)
    
    layers = [n_x, 100, 20, n_y]
    num_layers = len(layers) - 1
    
    parameters = {}
    
    for i in range(num_layers):
        W = tf.get_variable("W"+str(i), shape = [layers[i+1], layers[i]], 
                            initializer = tf.contrib.layers.xavier_initializer(seed = 1))
        b = tf.get_variable("b" + str(i), [layers[i+1], 1], initializer = tf.zeros_initializer())
        parameters["W"+str(i)] = W
        parameters["b"+str(i)] = b
    
    A = X
    for i in range(num_layers):
        W = parameters["W"+str(i)]
        b = parameters["b"+str(i)]
        Z = tf.matmul(W, A) + b
        A = tf.nn.tanh(Z) if i < num_layers - 1 else tf.nn.softmax(Z)
    
    logits = tf.transpose(Z)
    labels = tf.transpose(Y)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = labels))
    
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    init = tf.global_variables_initializer()
    
    costs = []
    with tf.Session() as session:
        session.run(init)
        for epoch in range(epochs):
            batchable = Batchable(X_train, y_train)
            for i in range(batchable.num_batches):
                X_data, y_data = batchable.next()
                _, cost_ = session.run([optimizer, cost], feed_dict={X: X_data, Y: y_data})
                costs.append(cost_)
            
            if epoch % 100 == 0 or epoch == epochs-1:
                print({"epoch": epoch, "cost": cost_})
                
        correct_prediction = tf.equal(tf.argmax(Z), tf.argmax(Y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: X_train, Y: y_train}))
        print ("Test Accuracy:", accuracy.eval({X: X_test, Y: y_test}))

start_time = time()
multi_layer(X_train_std.T, y_train.T, X_test.T, y_test.T, learning_rate=0.1, epochs=500)
print("Total time taken: ", time() - start_time)

{'epoch': 0, 'cost': 0.65078586}
{'epoch': 100, 'cost': 0.57446724}
{'epoch': 200, 'cost': 0.55498886}
{'epoch': 300, 'cost': 0.58173239}
{'epoch': 400, 'cost': 0.53572977}
{'epoch': 499, 'cost': 0.50085109}
Train Accuracy: 0.8055
Test Accuracy: 0.5365
Total time taken:  1717.75910115242
