# Computer Vision and Conv Neural Nets

In [5]:
# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# display multiple outputs within a cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all";

import warnings
warnings.filterwarnings('ignore');

# TensorFlow and tf.keras
from tensorflow import keras
import tensorflow as tf

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

1.12.0


# Convolutional Neural Net: Mnist

In [6]:
from tensorflow.keras import layers
from tensorflow.keras import models
# build Convnet
model = models.Sequential()
# input = (image height, image width, image channels)
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
# output = (image height, image width, image channels)
model.add(layers.MaxPooling2D((2, 2)))
# first argument of Conv2D controls image channels/depth
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
# image height, width shrink throughout
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

In [7]:
# add classifier onto Convnet
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten (Flatten)            (None, 576)               0         
_________________________________________________________________
dense (Dense)                (None, 64)                36928     
__________

In [8]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [7]:
# mnist data
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=5, batch_size=64)

# let’s evaluate the model on the test data:
test_loss, test_acc = model.evaluate(test_images, test_labels)
test_acc

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x2761a73f7f0>



0.9925

CNN are great for image classification - learn translation invariant features and hierarchical image structure

Output = (height, width, output depth) where output depth represents the number of response maps from applying filter (sliding window) over the input image

Set stride (how far window slides) and padding (extra border space) parameters to get different applied filters  

Convolution = Takes linear transformation of input window <br>
Max Pooling = Takes maximum of input window (reduces feature space, learns higher level structure) <br>
Average Pooling = takes average of input window