Load/Get data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import linear_model, preprocessing

#-------------
import matplotlib.image as mpimg
import skimage.io
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb
from skimage.transform import resize
import matplotlib.patches as mpatches
from skimage import data
import skimage

In [2]:
# Taken from Lab 6
import os.path

class ImgException(Exception):
    def __init__(self, msg='No msg'):
        self.msg = msg


def load_img(char_ind, samp_ind):
    """
    Returns the image from the dataset given a character and sample index.
    
        
    If the file doesn't exist, it raises an Exception with the filename.   
    """ 
     
    if(char_ind <10):
         Schar_ind = '00'+str(char_ind)
    else:
        Schar_ind = '0'+str(char_ind)
        
    if(samp_ind <10):
         Ssamp_ind = '00'+str(samp_ind)
    else:
        Ssamp_ind = '0'+str(samp_ind)
    
        
    
    fname = "Hnd/img/Sample"+str(Ssamp_ind)+"/img"+str(Ssamp_ind)+"-"+str(Schar_ind)+".png"
    
    
    if(not(os.path.isfile(fname))):
        raise ImgException("No file found") 
    

    img = skimage.io.imread(fname)
    return img



Resize Image

In [3]:
# Taken from Lab 6
def mnist_resize(img):
    """
    Extracts a character from the image, and places in a 28x28 image to match the MNIST format.
    
    Returns:
    img1:  MNIST formatted 28 x 28 size image with the character from img
    box:   A bounding box indicating the locations where the character was found in img.    
    """
    # Image sizes (fixed for now).  To match the MNIST data, the image 
    # will be first resized to 20 x 20.  Then, the image will be placed in center of 28 x 28 box
    # offet by 4 on each side.
    nx_img = 20   
    ny_img = 20
    nx_box = 28   
    ny_box = 28
    offx = 4
    offy = 4
    
    # Convert the image to gray scale using the skimage.color.rgb2gray method.
    bw = skimage.color.rgb2gray(img)
    
    # Threshold the image using OTSU threshold
    thresh = threshold_otsu(bw)
    bw = closing(bw < thresh, square(3)).astype(int)
    
    # Get the regions in the image.
    regions = regionprops(bw)

    # TODO:  Find region with the largest area.  You can get the region area from region.area.
    region_max = regions[0]
    for region in regions:
        if(region_max.area < region.area):
            region_max = region
            
    area_max = region_max.area
    # Raise an ImgException if no region with area >= 100 was found
    if (area_max < 100):
        raise ImgException("No image found")    
                
    # Get the bounding box of the character from region_max.bbox
    minr, minc, maxr, maxc = region_max.bbox
    box = [minr,minc,maxr,maxc]
    
    # TODO:  Crop the image in bw to the bounding box
    bw_crop = bw[minr:maxr,minc:maxc]
        
    # TODO:  Resize the cropped image to a 20x20 using the resize command.
    # You will need to use the mode = 'constant' option
    bw_resize = resize(bw_crop,[20,20], mode='constant')
    
    # Threshold back to a 0-1 image
    bw = (bw_resize > np.mean(bw_resize))
    
    # Place extracted 20 x 20 image in larger image 28 x 28
    img1 = np.zeros([28,28])
    img1[4:24,4:24] = bw        
    return img1, box

In [6]:
#Load the letter 'a'

# Dimensions
nlet = 1000
nrow = 28
ncol = 28
npix = nrow*ncol
Xlet = np.zeros((nlet, npix))
Y = [];
i = 0
while i < nlet:
    # TODO:  Generate a random character and sample    
    char_ind = np.random.randint(37,62)        
    samp_ind = np.random.randint(0,49)
        
    try:
        # TODO:  Load the image with load_img function
        img =load_img(samp_ind,char_ind)
        
        # TODO:  Reize the image with mnist_resize function
        img1, box = mnist_resize(img)
        
        # TODO:  Store the image in a row of Xlet[i,:] and increment i
        Xlet[i]= img1.ravel()
        i += 1
        
        if(char_ind == 37):
            Y.append([1])
        
        # Print progress
        if (i % 50 == 0):
            print ('images captured = {0:d}'.format(i))
    except ImgException:
        # Skip if image loading or resizing failed
        pass

images captured = 50
images captured = 100
images captured = 150
images captured = 200
images captured = 250
images captured = 300
images captured = 350
images captured = 400
images captured = 450
images captured = 500
images captured = 550
images captured = 600
images captured = 650
images captured = 700
images captured = 750
images captured = 800
images captured = 850
images captured = 900
images captured = 950
images captured = 1000


In [None]:
# -*- coding: utf-8 -*-

#Train to detect letter A

"""
Based on the tflearn example located here:
https://github.com/tflearn/tflearn/blob/master/examples/images/convnet_cifar10.py
"""
from __future__ import division, print_function, absolute_import

# Import tflearn and some helpers
import tflearn
from tflearn.data_utils import shuffle
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
import pickle

# Load the data set
X = Xlet[:499]
Y = Y[:499]
X_test = Xlet[500:]
Y_test = Y[500:]

X= np.array(X)
Y= np.array(Y)

pickle.load(open("full_dataset.pkl", "rb"), encoding='latin1')

# Shuffle the data
#X, Y = shuffle(X, Y)

# Make sure the data is normalized
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
img_prep.add_featurewise_stdnorm()

# Create extra synthetic training data by flipping, rotating and blurring the
# images on our data set.
img_aug = ImageAugmentation()
img_aug.add_random_flip_leftright()
img_aug.add_random_rotation(max_angle=25.)
img_aug.add_random_blur(sigma_max=3.)

# Define our network architecture:

network = input_data(shape=[None, nrow, ncol, 3],
                     data_preprocessing=img_prep,
                     data_augmentation=img_aug)

# Step 1: Convolution
network = conv_2d(network, 32, 3, activation='relu')

# Step 2: Max pooling
network = max_pool_2d(network, 2)

# Step 3: Convolution again
network = conv_2d(network, 64, 3, activation='relu')

# Step 4: Convolution yet again
network = conv_2d(network, 64, 3, activation='relu')

# Step 5: Max pooling again
network = max_pool_2d(network, 2)

# Step 6: Fully-connected 512 node neural network
network = fully_connected(network, 512, activation='relu')

# Step 7: Dropout - throw away some data randomly during training to prevent over-fitting
network = dropout(network, 0.5)

# Step 8: Fully-connected neural network with two outputs (0=isn't a bird, 1=is a bird) to make the final prediction
network = fully_connected(network, 2, activation='softmax')

# Tell tflearn how we want to train the network
network = regression(network, optimizer='adam',
                     loss='categorical_crossentropy',
                     learning_rate=0.001)

# Wrap the network in a model object
model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path='bird-classifier.tfl.ckpt')

# Train it! We'll do 10 training passes and monitor it as it goes.
model.fit(X, Y, n_epoch=10, shuffle=True, validation_set=(X_test, Y_test),
          show_metric=True, batch_size=96,
          snapshot_epoch=True,
          run_id=None)

# Save model when training is complete to a file
model.save("a-classifier.tfl")
print("Network trained and saved as bird-classifier.tfl!")
