In [1]:
# Machine Learning with Tensorflow
# get data here http://www.cs.toronto.edu/~kriz/cifar.html
# run python tensorflow 1.x code
# https://github.com/BinRoot/TensorFlow-Book.git
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()



Instructions for updating:
non-resource variables are not supported in the long term


In [2]:

import pickle

def unpickle(file):
    fo = open(file, 'rb')
    this_dict = pickle.load(fo, encoding='latin1')
    fo.close()
    return this_dict

In [3]:
# cleaning the data
import numpy as np

def clean(data):
    imgs = data.reshape(data.shape[0], 3, 32, 32)
    grayscale_imgs = imgs.mean(1)
    cropped_imgs = grayscale_imgs[:, 4:28, 4:28]
    img_data = cropped_imgs.reshape(data.shape[0], -1)
    img_size = np.shape(img_data)[1]
    means = np.mean(img_data, axis=1)
    meansT = means.reshape(len(means), 1)
    stds = np.std(img_data, axis=1)
    stdsT = stds.reshape(len(stds), 1)
    adj_stds = np.maximum(stdsT, 1.0 / np.sqrt(img_size))
    normalized = (img_data - meansT) / adj_stds
    return normalized

In [4]:
def read_data(directory):
    names = unpickle('{}/batches.meta'.format(directory))['label_names']
    print('names', names)
    data, labels = [], []
    for i in range(1, 6):
        filename = '{}/data_batch_{}'.format(directory, i)
        batch_data = unpickle(filename)
        if len(data) > 0: 
            data = np.vstack((data, batch_data['data']))
            labels = np.hstack((labels, batch_data['labels'])
                              )        
        else:
            data = batch_data['data']
            labels = batch_data['labels']
    print(np.shape(data), np.shape(labels))
    data = clean(data) 
    data = data.astype(np.float32)
    return names, data, labels


In [5]:
import numpy as np
import matplotlib.pyplot as plt

learning_rate = 0.001

names, data, labels = \
    read_data(r'D:\Documents\1_Projects\CRSIP_ML\data\cifar-10-batches-py')

names ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
(50000, 3072) (50000,)


In [6]:


x = tf.placeholder(tf.float32, [None, 24 * 24])
y = tf.placeholder(tf.float32, [None, len(names)])
W1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))
b1 = tf.Variable(tf.random_normal([64]))
W2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))
b2 = tf.Variable(tf.random_normal([64]))
W3 = tf.Variable(tf.random_normal([6*6*64, 1024]))
b3 = tf.Variable(tf.random_normal([1024]))
W_out = tf.Variable(tf.random_normal([1024, len(names)]))
b_out = tf.Variable(tf.random_normal([len(names)]))



In [7]:
def conv_layer(x, W, b):
    conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    conv_with_b = tf.nn.bias_add(conv, b)
    conv_out = tf.nn.relu(conv_with_b)
    return conv_out


def maxpool_layer(conv, k=2):
    return tf.nn.max_pool(conv, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

In [8]:
def model():
    x_reshaped = tf.reshape(x, shape=[-1, 24, 24, 1])

    conv_out1 = conv_layer(x_reshaped, W1, b1)
    maxpool_out1 = maxpool_layer(conv_out1)
    norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
    conv_out2 = conv_layer(norm1, W2, b2)
    norm2 = tf.nn.lrn(conv_out2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
    maxpool_out2 = maxpool_layer(norm2)

    maxpool_reshaped = tf.reshape(maxpool_out2, [-1, W3.get_shape().as_list()[0]])
    local = tf.add(tf.matmul(maxpool_reshaped, W3), b3)
    local_out = tf.nn.relu(local)

    out = tf.add(tf.matmul(local_out, W_out), b_out)
    return out

In [9]:
model_op = model()

cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=model_op, labels=y)
)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    onehot_labels = tf.one_hot(labels, len(names), on_value=1., off_value=0., axis=-1)
    onehot_vals = sess.run(onehot_labels)
    batch_size = len(data) // 200
    print('batch size', batch_size)
    for j in range(0, 1000):
        avg_accuracy_val = 0.
        batch_count = 0.
        for i in range(0, len(data), batch_size):
            batch_data = data[i:i+batch_size, :]
            batch_onehot_vals = onehot_vals[i:i+batch_size, :]
            _, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})
            avg_accuracy_val += accuracy_val
            batch_count += 1.
        avg_accuracy_val /= batch_count
        print('Epoch {}. Avg accuracy {}'.format(j, avg_accuracy_val))

batch size 250
Epoch 0. Avg accuracy 0.19425999995321036
Epoch 1. Avg accuracy 0.2516200008243322
Epoch 2. Avg accuracy 0.2709800013899803
Epoch 3. Avg accuracy 0.2845400020480156
Epoch 4. Avg accuracy 0.2975799997150898
Epoch 5. Avg accuracy 0.30834000058472155
Epoch 6. Avg accuracy 0.3160800018906593
Epoch 7. Avg accuracy 0.32366000026464464
Epoch 8. Avg accuracy 0.33454000018537045
Epoch 9. Avg accuracy 0.3391800002753735
Epoch 10. Avg accuracy 0.34534000128507614
Epoch 11. Avg accuracy 0.34866000041365625
Epoch 12. Avg accuracy 0.3531800006330013
Epoch 13. Avg accuracy 0.3599000005424023
Epoch 14. Avg accuracy 0.3616600015759468
Epoch 15. Avg accuracy 0.3661600014567375
Epoch 16. Avg accuracy 0.36822000056505205
Epoch 17. Avg accuracy 0.3799200016260147
Epoch 18. Avg accuracy 0.37990000054240225
Epoch 19. Avg accuracy 0.38828000113368033
Epoch 20. Avg accuracy 0.3903400012850761
Epoch 21. Avg accuracy 0.391440000385046
Epoch 22. Avg accuracy 0.38884000085294246
Epoch 23. Avg accura

KeyboardInterrupt: 