## Birdspotter is a keras machine learning model to identify images that contain birds

Written by George Bigham


In [11]:
import keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import array_to_img
from keras.preprocessing.image import ImageDataGenerator

Code for moving image files

In [12]:
import shutil
import os

source = 'images/'

# names is a list of subdirectories (categories) in the images folder
names = os.listdir(source)
# names

Data split 80% for training and 20% validation

In [19]:
# move 80% of files to training set and rest to validation set

train_set = 'data/train/bird/'
valid_set = 'data/validation/bird/'

for name in names:
    if name != '.DS_Store':
        files = os.listdir(source + name)
        n_files = len(files)
        train_set_size = 0.8 * n_files
        train_file_count = 0
        for file in files:
            if train_file_count < train_set_size:
                shutil.copy(source + name + '/' + file, train_set)
            else:
                shutil.copy(source + name + '/' + file, valid_set)
            train_file_count += 1    

In [13]:
# rename images in 101_ObjectCategories to be unique across categories
non_bird_source = '101_ObjectCategories'
object_categories = os.listdir(non_bird_source)

for object_type in object_categories:
    if object_type != '.DS_Store':
        files = os.listdir(non_bird_source + '/'+ object_type)
        for file in files:
            os.rename(non_bird_source + '/'+ object_type + '/' + file, \
                      non_bird_source + '/'+ object_type + '/' + object_type +'_' + file)

In [21]:
# move non bird images to birdless folder in training and validation

non_bird_source = '101_ObjectCategories'
object_categories = os.listdir(non_bird_source)

train_set_nb = 'data/train/birdless/'
valid_set_nb = 'data/validation/birdless/'


for object_type in object_categories:
    if object_type != '.DS_Store':
        files = os.listdir(non_bird_source + '/'+ object_type)
        n_files = len(files)
        file_count = 0
        for file in files:
            if file_count < 40:
                shutil.copy(non_bird_source + '/'+ object_type + '/' + file, train_set_nb)
            elif ((file_count >= 40) & (file_count <60)):
                shutil.copy(non_bird_source +'/'+ object_type + '/' + file, valid_set_nb)
            else: break
            file_count += 1 

## Code for building machine learning model


In [13]:
from keras.preprocessing.image import ImageDataGenerator

# initalize Image Data Generator for image augmentation

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [14]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

In [15]:
# build model

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(150, 150, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

In [16]:
model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [17]:
batch_size = 16

# this is the augmentation configuration used for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        'data/train',  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
validation_generator = test_datagen.flow_from_directory(
        'data/validation',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')

Found 8911 images belonging to 2 classes.
Found 2474 images belonging to 2 classes.


In [21]:
# Load the model

from tensorflow.keras.models import load_model
model = load_model("fourth_try_full.h5")
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [22]:
import numpy as np
from keras.preprocessing import image

img_width, img_height = 150, 150


In [25]:
img = image.load_img('test/bird1.jpg', target_size = (150, 150))
img = image.img_to_array(img)
img = img*(1./256)  # rescale
img = np.expand_dims(img, axis = 0)

confident_bird = 1- model.predict(img, batch_size = 1)
confident_bird

array([[1.]], dtype=float32)

In [28]:
confident_bird[0][0]

1.0