## Using tensorflow for linear Classification

Here I will use tensorflow to do a linear classfication of a batch of the CIFAR images. Of course using tensorflow for this task can be an overkill. However, the purpose of this notebook is to personally became acquainted with tf's syntax.

In [124]:
import numpy as np
import pickle
import tensorflow as tf
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split

In [119]:
## Get One batch of CFAR 10 data set

def load_cfar10_batch(cifar10_dataset_folder_path, batch_id):
    """
    Load a batch of the dataset
    """
    with open(cifar10_dataset_folder_path + '/data_batch_' + str(batch_id), mode='rb') as file:
        batch = pickle.load(file, encoding='latin1')

    features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    labels = batch['labels']

    return features, labels

In [125]:
#Readin in the data
data = load_cfar10_batch('../Udacity/deep_learning/projects/image-classification/cifar-10-batches-py',
                 1)
features = data[0]
labels = data[1]

In [126]:
def image_standarization(x):
    """
    Normalize a list of sample image data in the range of 0 to 1
    : x: List of image data.  The image shape is (32, 32, 3)
    : return: Numpy array of normalize data
    """
    x_demean = x - np.mean(x)
    adjusted_sd = np.maximum(np.std(x), 1.0/np.sqrt(np.prod(x.shape)))
    return  x_demean / adjusted_sd


In [127]:
features = np.array([image_standarization(i) for i in features])
features = features.reshape(10000, 32*32*3)


In [128]:
# Updating weights after one gradient descent for first image
#One hot encoding labels
image_cat = list(range(0,10))
encoder = LabelBinarizer()
encoder.fit(image_cat)
labels = encoder.transform(labels)

In [147]:
num_per_feature = features.shape[1]
num_classes = 10

3072

In [130]:
## Generating minibatches
## Taken from udacity helper function
def get_batch(batch_size, features, labels):
    for start in range(0, len(features), batch_size):
        end = min(start + batch_size, len(features))
        yield features[start:end], labels[start:end]

In [131]:
#Splitting train / validation / test
image, image_test, y, y_test = train_test_split(features,labels,test_size=0.2,train_size=0.8)
image_train, image_val, y_train, y_val = train_test_split(image, y,test_size = 0.25,train_size =0.75)

In [146]:
image_train.shape

(6000, 3072)

In [160]:
#Creating Inputs and Labels Placeholders
tf.reset_default_graph()


inputs_ = tf.placeholder(tf.float32, [None, num_per_feature], name="input")
labels_ = tf.placeholder(tf.int32, [None, num_classes], name="labels")

#Creating Weights and Biases
W = tf.Variable(tf.random_normal([num_per_feature, num_outputs], stddev=0.1))
b = tf.Variable(tf.random_normal([num_classes], stddev=0.1))
fc = tf.add(tf.matmul(inputs_, W), b)

logits = tf.nn.softmax(fc)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc, labels=labels_))
optimizer = tf.train.AdamOptimizer().minimize(cost)

#Accuracy calculation
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

In [166]:
epochs = 100
batch_size = 128

In [172]:
with tf.Session() as sess:
    # Initializing the variables
    sess.run(tf.global_variables_initializer())

    for e in range(epochs):
        for feature_batch, label_batch in get_batch(batch_size, image_train, y_train):
            loss, _ = sess.run([cost, optimizer], feed_dict = {
                inputs_: feature_batch, 
                labels_: label_batch})
        print("Loss: {}\n".format(loss))
        
    #Each 4 iterations, check validation accuracy
        if e%4 == 0:
            val_acc = sess.run(accuracy, feed_dict = {
                    inputs_: image_val,
                    labels_: y_val
                })
            print("Validation Accuracy: {}\n".format(val_acc))
            
    #Finally get testing accuracy - This is after you have selected the appropriate
    #parameters using your validation set.
    
    test_accuracy = sess.run(accuracy, feed_dict={
        inputs_: image_test,
        labels_: y_test
    })
    
    print("\nTest accuracy: {}\n".format(test_accuracy))
                   

Loss: 3.4463002681732178

Validation Accuracy: 0.25

Loss: 2.815884828567505

Loss: 2.492341995239258

Loss: 2.273406744003296

Loss: 2.1115572452545166

Validation Accuracy: 0.28600001335144043

Loss: 1.9849728345870972

Loss: 1.8836100101470947

Loss: 1.8011993169784546

Loss: 1.7332041263580322

Validation Accuracy: 0.29600000381469727

Loss: 1.6763395071029663

Loss: 1.6282089948654175

Loss: 1.5869934558868408

Loss: 1.5512721538543701

Validation Accuracy: 0.30300000309944153

Loss: 1.519923448562622

Loss: 1.4920724630355835

Loss: 1.4670449495315552

Loss: 1.4443247318267822

Validation Accuracy: 0.3034999966621399

Loss: 1.4235126972198486

Loss: 1.4042967557907104

Loss: 1.3864305019378662

Loss: 1.3697155714035034

Validation Accuracy: 0.30649998784065247

Loss: 1.353994607925415

Loss: 1.3391377925872803

Loss: 1.3250408172607422

Loss: 1.3116158246994019

Validation Accuracy: 0.30399999022483826

Loss: 1.298789143562317

Loss: 1.2864996194839478

Loss: 1.2746943235397339

