In [1]:
import os
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split

import tensorflow as tf



In [2]:
os.chdir('/home/abhishek/Desktop/Projects/tf/yet_another_ML_tutorial/coding_exercise/')
raw_data = pd.read_csv("./CreditDataset.csv", header=None)
print "Shape of original data frame:", raw_data.shape

Shape of original data frame: (1000, 21)


In [3]:
# Get data types
print raw_data.dtypes
obj_df = raw_data.select_dtypes(include=['object']).copy()
print "Shape of object data frame:", obj_df.shape
int_df = raw_data.select_dtypes(include=['int64']).copy()
print "Shape of int64 data frame:", int_df.shape
print "Type of int data frame:", type(int_df)

# Check for null values in the columns containing categorical variables
print obj_df[obj_df.isnull().any(axis=1)]

0     object
1      int64
2     object
3     object
4      int64
5     object
6     object
7      int64
8     object
9     object
10     int64
11    object
12     int64
13    object
14    object
15     int64
16    object
17     int64
18    object
19    object
20     int64
dtype: object
Shape of object data frame: (1000, 13)
Shape of int64 data frame: (1000, 8)
Type of int data frame: <class 'pandas.core.frame.DataFrame'>
Empty DataFrame
Columns: [0, 2, 3, 5, 6, 8, 9, 11, 13, 14, 16, 18, 19]
Index: []


In [4]:
# One hot encoding of the columns containing categorical variables
# Label encoder
# 1. INSTANTIATE
# encode labels with value between 0 and n_classes-1.
le = preprocessing.LabelEncoder()
# FIT AND TRANSFORM. use df.apply() to apply le.fit_transform to all columns
le_obj_df = obj_df.apply(le.fit_transform)
# print raw_data.select_dtypes(include=['object']).head(5)
# print le_obj_df.head()

# One hot encoding of categorical variables
# 1. INSTANTIATE
encode_object = preprocessing.OneHotEncoder()
# 2. FIT
encode_object.fit(le_obj_df)
# 3. Transform
onehotlabels = encode_object.transform(le_obj_df).toarray()
print onehotlabels.shape
print type(onehotlabels)

# Merge the int64 data frame with the one hot labels
np_int_df = int_df.as_matrix()
print np_int_df.shape
processed_data = np.concatenate([onehotlabels, np_int_df], axis=1)
print processed_data.shape

# print processed_data[:,-1]
print processed_data.dtype

# One hot encoding of labels. Append the one hot labels in the preprocessed data after 
# removing the actual labels. This means that the preprocessed data would now have 63 
# columns. 
raw_labels = np.array(processed_data[:,-1]).astype(int)
encoded_labels = np.zeros((processed_data[:,-1].shape[0], 2))
encoded_labels[np.arange(processed_data[:,-1].shape[0]), raw_labels-1] = 1


processed_data = processed_data[:,0:61]
processed_data = np.concatenate([processed_data, encoded_labels], axis=1)
print processed_data.shape


(1000, 54)
<type 'numpy.ndarray'>
(1000, 8)
(1000, 62)
float64
(1000, 63)


In [5]:
# Get test train split
X_train, X_test, y_train, y_test = train_test_split(processed_data[:, 0:61],
                                                    processed_data[:, 61:63],
                                                    test_size=0.3,
                                                    random_state=42)


In [6]:
print X_train.shape, X_test.shape

(700, 61) (300, 61)


In [7]:
print y_train.shape, y_test.shape

(700, 2) (300, 2)


In [8]:
def random_batch(dataset, batch_size):
    sample = dataset[np.random.choice(dataset.shape[0], batch_size, replace=False),:]
    last_col_index = dataset.shape[1]-2
    x = sample[:,0:last_col_index]
    y = sample[:,last_col_index:last_col_index+2]
    return (x, y)

In [9]:
# Parameters
training_epochs = 25
learning_rate = 0.001
num_steps = 500000
batch_size = 100
display_step = 100

# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
num_input = X_train.shape[1] 
num_classes = y_train.shape[1]

print "Number of input:", num_input
print "Number of classes:", num_classes

# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])

Number of input: 61
Number of classes: 2


In [10]:
# Set model weights
W = tf.Variable(tf.zeros([num_input, num_classes]))
b = tf.Variable(tf.zeros([num_classes]))

In [None]:
# Construct model
pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()

In [None]:
# Start training
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(X_train/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = random_batch(np.concatenate([X_train, y_train], axis=1),
                                              batch_size)
            # Fit training using batch data
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                          y: batch_ys})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print "Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost)

    print "Optimization Finished!"

    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # Calculate accuracy for 3000 examples
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print "Accuracy:", accuracy.eval({x: X_test, y: y_test})

Step 1, Minibatch Loss= 780731.0625, Training Accuracy= 0.210
Step 100, Minibatch Loss= 12819.0850, Training Accuracy= 0.700
Step 200, Minibatch Loss= 9483.3389, Training Accuracy= 0.650
Step 300, Minibatch Loss= 29499.9434, Training Accuracy= 0.300
Step 400, Minibatch Loss= 4385.1675, Training Accuracy= 0.480
Step 500, Minibatch Loss= 5533.4785, Training Accuracy= 0.470
Step 600, Minibatch Loss= 35744.6406, Training Accuracy= 0.290
Step 700, Minibatch Loss= 21940.8945, Training Accuracy= 0.350
Step 800, Minibatch Loss= 3054.1096, Training Accuracy= 0.710
Step 900, Minibatch Loss= 8765.0000, Training Accuracy= 0.770
Step 1000, Minibatch Loss= 11961.2773, Training Accuracy= 0.460
Step 1100, Minibatch Loss= 11436.5273, Training Accuracy= 0.710
Step 1200, Minibatch Loss= 16345.3037, Training Accuracy= 0.670
Step 1300, Minibatch Loss= 7451.9771, Training Accuracy= 0.720
Step 1400, Minibatch Loss= 8149.7036, Training Accuracy= 0.680
Step 1500, Minibatch Loss= 5334.7031, Training Accuracy= 0

Step 13000, Minibatch Loss= 2619.3875, Training Accuracy= 0.710
Step 13100, Minibatch Loss= 2483.3774, Training Accuracy= 0.710
Step 13200, Minibatch Loss= 4710.4956, Training Accuracy= 0.770
Step 13300, Minibatch Loss= 8376.9590, Training Accuracy= 0.540
Step 13400, Minibatch Loss= 782.0744, Training Accuracy= 0.900
Step 13500, Minibatch Loss= 3133.8503, Training Accuracy= 0.780
Step 13600, Minibatch Loss= 3582.4111, Training Accuracy= 0.700
Step 13700, Minibatch Loss= 1626.2415, Training Accuracy= 0.770
Step 13800, Minibatch Loss= 2309.3552, Training Accuracy= 0.800
Step 13900, Minibatch Loss= 1377.4655, Training Accuracy= 0.770
Step 14000, Minibatch Loss= 46863.2812, Training Accuracy= 0.320
Step 14100, Minibatch Loss= 1754.2423, Training Accuracy= 0.710
Step 14200, Minibatch Loss= 4439.7363, Training Accuracy= 0.650
Step 14300, Minibatch Loss= 4306.2949, Training Accuracy= 0.720
Step 14400, Minibatch Loss= 7048.6660, Training Accuracy= 0.580
Step 14500, Minibatch Loss= 4788.7246, T

Step 25800, Minibatch Loss= 1755.3243, Training Accuracy= 0.720
Step 25900, Minibatch Loss= 1564.6893, Training Accuracy= 0.720
Step 26000, Minibatch Loss= 2916.7175, Training Accuracy= 0.690
Step 26100, Minibatch Loss= 4729.8745, Training Accuracy= 0.760
Step 26200, Minibatch Loss= 863.5269, Training Accuracy= 0.840
Step 26300, Minibatch Loss= 1383.2690, Training Accuracy= 0.810
Step 26400, Minibatch Loss= 6285.6689, Training Accuracy= 0.700
Step 26500, Minibatch Loss= 4307.7651, Training Accuracy= 0.670
Step 26600, Minibatch Loss= 4202.7676, Training Accuracy= 0.670
Step 26700, Minibatch Loss= 2027.2784, Training Accuracy= 0.700
Step 26800, Minibatch Loss= 1388.1200, Training Accuracy= 0.790
Step 26900, Minibatch Loss= 8741.0010, Training Accuracy= 0.530
Step 27000, Minibatch Loss= 3790.5457, Training Accuracy= 0.620
Step 27100, Minibatch Loss= 7353.0845, Training Accuracy= 0.560
Step 27200, Minibatch Loss= 1695.6428, Training Accuracy= 0.770
Step 27300, Minibatch Loss= 5342.2983, Tr

Step 38600, Minibatch Loss= 5026.7100, Training Accuracy= 0.700
Step 38700, Minibatch Loss= 4404.1914, Training Accuracy= 0.730
Step 38800, Minibatch Loss= 3941.0947, Training Accuracy= 0.710
Step 38900, Minibatch Loss= 1771.4971, Training Accuracy= 0.730
Step 39000, Minibatch Loss= 11896.6680, Training Accuracy= 0.480
Step 39100, Minibatch Loss= 1302.8524, Training Accuracy= 0.780
Step 39200, Minibatch Loss= 4065.0706, Training Accuracy= 0.620
Step 39300, Minibatch Loss= 1490.2064, Training Accuracy= 0.700
Step 39400, Minibatch Loss= 3127.0461, Training Accuracy= 0.610
Step 39500, Minibatch Loss= 5597.4888, Training Accuracy= 0.580
Step 39600, Minibatch Loss= 4656.1445, Training Accuracy= 0.580
Step 39700, Minibatch Loss= 5073.2349, Training Accuracy= 0.740
Step 39800, Minibatch Loss= 3785.4355, Training Accuracy= 0.650
Step 39900, Minibatch Loss= 3540.4272, Training Accuracy= 0.780
Step 40000, Minibatch Loss= 8083.3813, Training Accuracy= 0.780
Step 40100, Minibatch Loss= 2012.8964, 

Step 51400, Minibatch Loss= 16327.9873, Training Accuracy= 0.410
Step 51500, Minibatch Loss= 2739.8730, Training Accuracy= 0.740
Step 51600, Minibatch Loss= 1569.3744, Training Accuracy= 0.820
Step 51700, Minibatch Loss= 1066.8926, Training Accuracy= 0.760
Step 51800, Minibatch Loss= 1112.8453, Training Accuracy= 0.830
Step 51900, Minibatch Loss= 1040.6414, Training Accuracy= 0.790
Step 52000, Minibatch Loss= 2502.0645, Training Accuracy= 0.730
Step 52100, Minibatch Loss= 985.3452, Training Accuracy= 0.720
Step 52200, Minibatch Loss= 9413.3564, Training Accuracy= 0.700
Step 52300, Minibatch Loss= 1731.7366, Training Accuracy= 0.790
Step 52400, Minibatch Loss= 5318.5215, Training Accuracy= 0.510
Step 52500, Minibatch Loss= 7568.2524, Training Accuracy= 0.480
Step 52600, Minibatch Loss= 2399.4111, Training Accuracy= 0.730
Step 52700, Minibatch Loss= 7767.5444, Training Accuracy= 0.530
Step 52800, Minibatch Loss= 2259.7246, Training Accuracy= 0.780
Step 52900, Minibatch Loss= 4936.8398, T

Step 64300, Minibatch Loss= 2399.2773, Training Accuracy= 0.760
Step 64400, Minibatch Loss= 1057.5408, Training Accuracy= 0.740
Step 64500, Minibatch Loss= 1089.7703, Training Accuracy= 0.770
Step 64600, Minibatch Loss= 2029.8258, Training Accuracy= 0.710
Step 64700, Minibatch Loss= 1026.1776, Training Accuracy= 0.730
Step 64800, Minibatch Loss= 3266.8181, Training Accuracy= 0.740
Step 64900, Minibatch Loss= 11139.9922, Training Accuracy= 0.380
Step 65000, Minibatch Loss= 2873.1619, Training Accuracy= 0.650
Step 65100, Minibatch Loss= 6514.4048, Training Accuracy= 0.710
Step 65200, Minibatch Loss= 3242.8586, Training Accuracy= 0.680
Step 65300, Minibatch Loss= 674.9144, Training Accuracy= 0.790
Step 65400, Minibatch Loss= 1274.9735, Training Accuracy= 0.830
Step 65500, Minibatch Loss= 5388.4082, Training Accuracy= 0.750
Step 65600, Minibatch Loss= 4362.9170, Training Accuracy= 0.710
Step 65700, Minibatch Loss= 2029.8275, Training Accuracy= 0.660
Step 65800, Minibatch Loss= 1449.0406, T

Step 77200, Minibatch Loss= 1285.5526, Training Accuracy= 0.760
Step 77300, Minibatch Loss= 1534.4321, Training Accuracy= 0.740
Step 77400, Minibatch Loss= 584.8220, Training Accuracy= 0.800
Step 77500, Minibatch Loss= 2916.5554, Training Accuracy= 0.740
Step 77600, Minibatch Loss= 6965.4043, Training Accuracy= 0.670
Step 77700, Minibatch Loss= 4973.1567, Training Accuracy= 0.750
Step 77800, Minibatch Loss= 2401.1760, Training Accuracy= 0.670
Step 77900, Minibatch Loss= 5856.0288, Training Accuracy= 0.520
Step 78000, Minibatch Loss= 6320.7358, Training Accuracy= 0.760
Step 78100, Minibatch Loss= 1197.7183, Training Accuracy= 0.830
Step 78200, Minibatch Loss= 1986.2089, Training Accuracy= 0.700
Step 78300, Minibatch Loss= 786.0383, Training Accuracy= 0.860
Step 78400, Minibatch Loss= 2882.7312, Training Accuracy= 0.570
Step 78500, Minibatch Loss= 6479.7417, Training Accuracy= 0.680
Step 78600, Minibatch Loss= 2055.1689, Training Accuracy= 0.730
Step 78700, Minibatch Loss= 7776.8730, Tra

Step 90100, Minibatch Loss= 10037.2783, Training Accuracy= 0.450
Step 90200, Minibatch Loss= 441.2373, Training Accuracy= 0.840
Step 90300, Minibatch Loss= 4077.4062, Training Accuracy= 0.710
Step 90400, Minibatch Loss= 2211.0488, Training Accuracy= 0.680
Step 90500, Minibatch Loss= 2475.9900, Training Accuracy= 0.790
Step 90600, Minibatch Loss= 2522.4805, Training Accuracy= 0.670
Step 90700, Minibatch Loss= 3208.7732, Training Accuracy= 0.720
Step 90800, Minibatch Loss= 2391.6201, Training Accuracy= 0.650
Step 90900, Minibatch Loss= 972.2650, Training Accuracy= 0.700
Step 91000, Minibatch Loss= 1069.6006, Training Accuracy= 0.720
Step 91100, Minibatch Loss= 6022.4150, Training Accuracy= 0.450
Step 91200, Minibatch Loss= 785.8714, Training Accuracy= 0.830
Step 91300, Minibatch Loss= 977.6635, Training Accuracy= 0.740
Step 91400, Minibatch Loss= 810.0675, Training Accuracy= 0.790
Step 91500, Minibatch Loss= 2310.2231, Training Accuracy= 0.580
Step 91600, Minibatch Loss= 4195.2358, Train