## Initialization

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

## Importing the Data

In [3]:
dataset = pd.read_csv('balance-scale.data', header = None)
dataset.shape

(625, 5)

In [4]:
dataset.head()

Unnamed: 0,0,1,2,3,4
0,B,1,1,1,1
1,R,1,1,1,2
2,R,1,1,1,3
3,R,1,1,1,4
4,R,1,1,1,5


In [5]:
X = dataset.iloc[:, 1:5].values
Y = dataset.iloc[:, 0].values

In [6]:
X[0]

array([1, 1, 1, 1], dtype=int64)

In [7]:
Y[0]

'B'

## Preprocess the Data

In [8]:
le_Y = LabelEncoder()

In [9]:
Y = le_Y.fit_transform(Y)

In [10]:
Y[0]

0

In [11]:
Y = Y.reshape(len(Y), 1)
ohe_Y = OneHotEncoder(categorical_features = [0])

In [12]:
Y = ohe_Y.fit_transform(Y).toarray()

In [13]:
Y[0]

array([ 1.,  0.,  0.])

## Create Train and Test Data

In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 4)

In [15]:
X_train.shape

(500, 4)

In [16]:
Y_train.shape

(500, 3)

In [17]:
X_test.shape

(125, 4)

In [18]:
Y_test.shape

(125, 3)

## Create TF Classifier

In [19]:
num_features = X.shape[1]
num_features

4

In [20]:
num_classes = Y.shape[1]
num_classes

3

In [21]:
# Y = W1.X1 + W2.X2 + .. + W4.X4 + B
# output = softmax(Y)
# Create Weights and Biases Variables

W = tf.Variable(tf.zeros([num_features, num_classes]))
B = tf.Variable(tf.zeros([num_classes]))

In [22]:
# Create x and y_ placeholders for actual data
x = tf.placeholder(tf.float32, [None, num_features])
y_ = tf.placeholder(tf.float32, [None, num_classes])

In [23]:
# Calculate y which holds the predicted values
Wx = tf.matmul(x, W)
y = Wx + B

In [24]:
# Create the cost function which has to be minimized
cost_cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_, logits = y))

In [25]:
# Create the optimizer to minimize cost function
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.5).minimize(cost_cross_entropy)

In [26]:
def trainTheData(num_steps, optimizer_to_use, batch_size):
    init = tf.global_variables_initializer()
    # initialize the global variables
    
    with tf.Session() as sess:
        sess.run(init)
        
        for i in range(num_steps):
            
            # Calculate batch start index
            if batch_size == len(X_train):
                batch_start_index = 0
            elif batch_size > len(X_train):
                raise ValueError("Batch Size : " + str(batch_size) + " cannot be greater than Data Size : ", len(X_train))
            else:
                batch_start_index = (i * batch_size) % (len(X_train) - batch_size)
            
            # Calculate batch end index
            batch_end_index = batch_size + batch_start_index
            
            # Create batch X and Y Values
            batch_X_values = X_train[batch_start_index : batch_end_index]
            batch_Y_values = Y_train[batch_start_index : batch_end_index]
            
            # Create feed dict to feed it to optimizer
            feed = {x : np.array(batch_X_values), y_ : np.array(batch_Y_values)}
            
            sess.run(optimizer_to_use, feed_dict = feed)
            
            if (i + 1) % 50 == 0:
                print("After " + str(i + 1) + " iterations, Cost : ", sess.run(cost_cross_entropy, feed_dict = feed))
                print("W : ", sess.run(W))
                print("B : ", sess.run(B))
                print("")
        
        # Create predicted values of logits
        Y_pred = sess.run(y, feed_dict = {x : np.array(X_test)})
        
        # Convert the array into tensor
        Y_pred_tensors = tf.convert_to_tensor(np.array(Y_pred))
        
        # Apply Softmax to tensor of logits
        apply_softmax = tf.nn.softmax(Y_pred_tensors)
        
        # Calculate the predicted class for the Test Data
        Y_pred_classes = np.argmax(sess.run(apply_softmax), axis = 1)
        
        sess.close()
    return Y_pred_classes

In [27]:
Y_pred_classes = trainTheData(num_steps = 1000, optimizer_to_use = optimizer, batch_size = len(X_train))

After 50 iterations, Cost :  0.265523
W :  [[-0.00202909  1.48565614 -1.48362541]
 [-0.04389681  1.48146069 -1.43756318]
 [-0.02556163 -1.45656133  1.48212361]
 [-0.02702699 -1.46516693  1.49219465]]
B :  [ 0.02376175 -0.01387769 -0.00988228]

After 100 iterations, Cost :  0.258572
W :  [[  5.20939939e-05   1.63357520e+00  -1.63362610e+00]
 [ -3.91791798e-02   1.62179041e+00  -1.58261085e+00]
 [ -2.63055004e-02  -1.60017478e+00   1.62648058e+00]
 [ -2.87524723e-02  -1.61041760e+00   1.63917136e+00]]
B :  [ 0.07519477 -0.04313596 -0.03205702]

After 150 iterations, Cost :  0.254074
W :  [[  7.56199472e-04   1.75146985e+00  -1.75222433e+00]
 [ -3.73302661e-02   1.73567653e+00  -1.69834566e+00]
 [ -2.73512341e-02  -1.71510005e+00   1.74245155e+00]
 [ -3.06268409e-02  -1.72648251e+00   1.75711060e+00]]
B :  [ 0.12625389 -0.07200551 -0.05424658]

After 200 iterations, Cost :  0.250933
W :  [[  4.83443495e-04   1.84982550e+00  -1.85030723e+00]
 [ -3.67493890e-02   1.83083534e+00  -1.79408503

In [28]:
Y_test_classes = np.argmax(Y_test, axis = 1)

## Check the Accuracy

In [29]:
accuracy_score(Y_test_classes, Y_pred_classes)

0.88800000000000001

In [30]:
confusion_matrix(Y_test_classes, Y_pred_classes)

array([[ 3,  0,  6],
       [ 5, 52,  1],
       [ 0,  2, 56]], dtype=int64)