In [41]:
import os
import sys
import time

from matplotlib import pyplot as plt
import random
import math
import pandas as pd
import numpy as np
import tensorflow as tf
import keras

from tensorflow.keras import layers
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array, smart_resize
from tensorflow.keras.applications import EfficientNetB0

from keras.layers import Input, Dense, Conv2D, MaxPool2D, Dropout, Flatten, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix


"""Load in class names

Retrieves class names of bird species classifications from text file

Args:
    dataset_path: 
        Path to folder containing this dataset
    
Returns:
    names:
        Dictionary mapping unique class identification numbers to species names

"""
def load_class_names(dataset_path=''):
  
    names = {}
  
    with open(os.path.join(dataset_path, 'classes.txt')) as f:
        for line in f:
            pieces = line.strip().split()
            class_id = pieces[0]
            names[class_id] = ' '.join(pieces[1:])
  
    return names

"""Load in image paths

Retrieves image file names from text file

Args:
    dataset_path: 
        Path to folder containing this dataset
    path_prefix: 
        Path to additional subdirectory containing pre-processed images
    
Returns:
    paths:
        Dictionary mapping unique image identification strings to image file paths

"""
def load_image_paths(dataset_path='', path_prefix=''):
  
    paths = {}
  
    with open(os.path.join(dataset_path, 'images.txt')) as f:
        for line in f:
            pieces = line.strip().split()
            image_id = pieces[0]
            path = os.path.join(path_prefix, pieces[1])
            paths[image_id] = path
  
    return paths

"""Load in image labels

Retrieves classification labels for each image in dataset

Args:
    dataset_path: 
        Path to folder containing this dataset
    
Returns:
    paths:
        Dictionary mapping unique image identification strings to classification ID

"""
def load_image_labels(dataset_path=''):
    labels = {}
  
    with open(os.path.join(dataset_path, 'image_class_labels.txt')) as f:
        for line in f:
            pieces = line.strip().split()
            image_id = pieces[0]
            class_id = pieces[1]
            labels[image_id] = class_id
  
    return labels


In [42]:
dataset_path = 'Pics/nabirds/nabirds/'
path_prefix = 'croppedimages'

# Load in image data dicts
img_class_dict = load_image_labels(dataset_path)
class_labels = load_class_names(dataset_path)
img_paths = load_image_paths(dataset_path, path_prefix)

In [43]:
id_list = []
label_list = []

# Generate lists needed for train_test_split
for img in img_paths:
      
    if os.path.exists(dataset_path+img_paths[img]) == True:
        id_list.append(img_paths[img])
        class_id = img_class_dict[img]
        label_list.append(class_labels[class_id])

In [44]:
# Split data into train/test data using sklearn train_test_split function
X_train, X_test, y_train, y_test = train_test_split(id_list, label_list, test_size = 0.05, stratify=label_list)

In [45]:
# Generate dataframes needed by ImageDataGenerator
train_dict = {'id': X_train, 'label' : y_train}
test_dict = {'id': X_test, 'label' : y_test}
train_df = pd.DataFrame(train_dict)
test_df = pd.DataFrame(test_dict)
train_df

Unnamed: 0,id,label
0,croppedimages\0834/623c7d3a3dc745f186883668a5d...,Winter Wren
1,croppedimages\0331/4138439b462c45bfae38a42f04d...,Black Scoter (Male)
2,croppedimages\0971/203334e90a3443f09fd5241daef...,White-throated Sparrow (Tan-striped/immature)
3,croppedimages\0521/4b5dbc56e82543279753bb51519...,Least Sandpiper
4,croppedimages\0969/ff12902826d24571b04f5b0fff8...,Black-throated Blue Warbler (Female/Immature m...
...,...,...
46128,croppedimages\0650/b0c9f7ac70e04093bb75be1a3af...,Reddish Egret (White morph)
46129,croppedimages\0451/66e1c752eb8b4ba2b6f27bb786e...,Forster's Tern
46130,croppedimages\0394/872fd311458849ea84636c2f8af...,Black-legged Kittiwake (Adult)
46131,croppedimages\0786/a36462ea64c74bd5860e09a3bb6...,Baltimore Oriole (Adult male)


In [46]:
# Create data generator object from TF Keras ImageDataGenerator
gen = ImageDataGenerator(vertical_flip=True, rotation_range=20)

In [47]:
# Generate train and test images with uniform size 
IMAGE_SIZE = [224, 224]

train_data = gen.flow_from_dataframe(
    dataframe=train_df,
    directory=dataset_path,
    target_size=IMAGE_SIZE,
    batch_size=128,
    x_col='id',
    y_col='label',
    class_mode='categorical'
)

test_data = gen.flow_from_dataframe(
    dataframe=test_df,
    directory=dataset_path,
    target_size=IMAGE_SIZE,
    batch_size=128,
    x_col='id',
    y_col='label',
    class_mode='categorical'
)

Found 46133 validated image filenames belonging to 555 classes.
Found 2429 validated image filenames belonging to 555 classes.


In [48]:
effnet = EfficientNetB0(include_top=False, weights='imagenet')
effnet.trainable = False

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [49]:
K = len(np.unique(np.array(label_list))) # number of classes
model = Sequential()
model.add(effnet)
model.add(GlobalAveragePooling2D())
model.add(Dense(K, activation='softmax'))
model.compile(optimizer=Adam(learning_rate=0.0001), loss='bce', metrics=['acc'])

In [61]:
r = model.fit(
  train_data,
  validation_data=test_data,
  batch_size=32,
  epochs=10)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [123]:
model.save('FinalModel.h5')

In [109]:
gen = ImageDataGenerator()
IMAGE_SIZE = [224, 224]

retest = gen.flow_from_dataframe(
    dataframe=test_df,
    directory=dataset_path,
    target_size=IMAGE_SIZE,
    batch_size=128,
    x_col='id',
    y_col='label',
    class_mode='categorical'
)

preds2 = model.predict(retest)
np.argmax(preds2[0])

Found 2429 validated image filenames belonging to 555 classes.


514

In [112]:
res = model.evaluate(retest)



In [None]:
values = []
predictions = []
actuals = []
names = []


for i in range(0, len(y_test)):
    image = load_img(dataset_path+X_test[i], target_size=(224, 224))
    image_arr = img_to_array(image)
    image_arr = image_arr.reshape(1,224,224,3)
    pred = model.predict(image_arr)
    highest = max(pred)
    label = np.argmax(pred)
    correct = label_map[y_test[i]]
    correct_name = y_test[i]
    
    values.append(highest)
    predictions.append(label)
    actuals.append(correct)
    names.append(correct_name)
    time.sleep(2)


In [134]:
conf = []
for val in values:
    conf.append(max(val))

In [135]:
misclass_dict = {'predicted': predictions, 'confidence': conf, 'actual_label': actuals, 'actual_name': names, 'File': X_test}
misclass_df = pd.DataFrame.from_dict(misclass_dict)
misclass_df

Unnamed: 0,predicted,confidence,actual_label,actual_name,File
0,19,0.997892,19,American Tree Sparrow,croppedimages\0894/ed562ed4e1f34917aceb77b9219...
1,37,0.999001,37,Barrow's Goldeneye (Breeding male),croppedimages\0334/563d0c9eb19640c98263056c7fd...
2,425,0.182764,199,Eastern Bluebird,croppedimages\0842/e437ed8d6d61436e902618cb28c...
3,515,0.980625,515,White-crowned Sparrow (Adult),croppedimages\0766/f0d808b6b82943bd9b89abdc9c6...
4,550,0.923201,550,Yellow-rumped Warbler (Breeding Myrtle),croppedimages\0747/8a58557a9d70412582cc993298e...
...,...,...,...,...,...
2424,364,0.485557,470,Summer Tanager (Female),croppedimages\0906/8c5116d41135429b822d51bf99e...
2425,230,0.817628,230,Golden-crowned Sparrow (Adult),croppedimages\0767/85aa51aef02e412192fa93f293b...
2426,137,0.846631,137,Carolina Chickadee,croppedimages\0811/022b83acb8454cc883b3ea0eca4...
2427,418,0.835788,265,Herring Gull (Immature),croppedimages\0607/9c638e7aceca4993839cf1fcc98...


In [136]:
misclass_df.to_csv('FinalModelMisclassifications.csv')