# Bird Species Classification:
### Using Computer Vision to Classify 525 Different Bird Species
By: David Hartsman

<hr style="border: 4px solid blue">

In [45]:
# Imports

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
import seaborn as sns
import pandas as pd
import os

import tensorflow as tf
# To streamline the naming, batch generation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Keras model types and layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras import backend
from tensorflow import keras

# Metrics for classification
from sklearn.metrics import classification_report
from tensorflow.keras.metrics import FalseNegatives, FalsePositives, TrueNegatives, TruePositives

import warnings
warnings.filterwarnings("ignore")

In [136]:
# Local path containing train/valid/test folders

path = "/Users/samalainabayeva/Desktop/FLAT_IRON!!!/birds_archive"

# 1st set of parameters for model that achieved 44% accuracy after 20 epochs of training
# image_generator = ImageDataGenerator(
#     rescale=1./255,  # Normalize pixel values to [0,1]
#     rotation_range=20,  # Randomly rotate images by up to 20 degrees
#     width_shift_range=0.2,  # Randomly shift images horizontally
#     height_shift_range=0.2,  # Randomly shift images vertically
#     shear_range=0.2,  # Shear intensity
#     zoom_range=0.2,  # Randomly zoom images
#     horizontal_flip=True,  # Randomly flip images horizontally
#     fill_mode='nearest'  # Fill in missing pixels with the nearest value

# Default values except for 'rescale', validation_split == 0 because we have separate validation data

image_generator = ImageDataGenerator(
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-06,
    rescale=1/255,  # This argument is still being utilized to scale data from 0 - 1
    rotation_range=0,
    width_shift_range=0.0,
    height_shift_range=0.0,
    brightness_range=None,
    shear_range=0.0,
    zoom_range=0.0,
    channel_shift_range=0.0,
    fill_mode="nearest",  # still the same
    cval=0.0,
    horizontal_flip=False,
    vertical_flip=False,
    preprocessing_function=None,
    data_format=None,
    validation_split=0.0,
    interpolation_order=1,
    dtype=None)


In [137]:
train_generator = image_generator.flow_from_directory(
    directory=os.path.join(path, "train"),
    target_size=(224, 224),
    color_mode="rgb",
    batch_size=32,
    class_mode="categorical",
    seed=13
)

valid_generator = image_generator.flow_from_directory(
    directory=os.path.join(path, "valid"),
    target_size=(224, 224),
    color_mode="rgb",
    batch_size=32,
    class_mode="categorical",
    seed=13
)

test_generator = image_generator.flow_from_directory(
    directory=os.path.join(path, "test"),
    target_size=(224, 224),
    color_mode="rgb",
    batch_size=32,
    class_mode="categorical",
    seed=13
)

## Docstring defaults
# image_generator.flow_from_directory(
#     directory,
#     target_size=(256, 256),
#     color_mode='rgb',
#     classes=None,
#     class_mode='categorical',
#     batch_size=32,
#     shuffle=True,
#     seed=None,
#     save_to_dir=None,
#     save_prefix='',
#     save_format='png',
#     follow_links=False,
#     subset=None,
#     interpolation='nearest',
#     keep_aspect_ratio=False,

Found 84635 images belonging to 525 classes.
Found 2625 images belonging to 525 classes.
Found 2625 images belonging to 525 classes.


In [138]:
# The labels for the image data
class_indices = train_generator.class_indices
reverse_dict = {val:key for key, val in class_indices.items()}

In [139]:
# Verifying dictionary construction
class_indices["OILBIRD"], reverse_dict[366],

(366, 'OILBIRD')

<hr style="border: 2px solid blue">

### Model Creation

In [140]:
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(3, 3), strides=(1,1), input_shape=(224,224,3), activation="relu"))
model.add(MaxPooling2D())

model.add(Conv2D(12, (2,2), (1,1), activation="relu"))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(192, activation = 'relu'))

model.add(Dense(525, activation="softmax"))

In [141]:
# Model Compilation

model.compile(optimizer="adam", loss="categorical_crossentropy", \
              metrics=["accuracy"])

In [142]:
# Summary of the Model

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 222, 222, 16)      448       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 111, 111, 16)     0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 110, 110, 12)      780       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 55, 55, 12)       0         
 2D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 36300)             0         
                                                                 
 dense_2 (Dense)             (None, 192)              

In [143]:
# Fitting the model

histoy = model.fit(train_generator, epochs=20, validation_data=valid_generator, workers=6)


# 1st model from previous notebook
# hist = model.fit(data_generator, epochs=20, validation_data=valid_generator, callbacks=[tensorboard_callback],\
#                  workers=6)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20

KeyboardInterrupt: 

<hr style="border:4px solid blue">

### Examining the model provided via kaggle

In [53]:
from tensorflow.keras.models import load_model

provided_model = load_model('/Users/samalainabayeva/Desktop/FLAT_IRON!!!/birds_archive/EfficientNetB0-525-(224 X 224)- 98.97.h5',
                           custom_objects={"F1_score":"F1_score"})

# Unknown metric function: F1_score. Please ensure this object is passed to the `custom_objects` argument. 
# See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.

# model= keras.models.load_model(model_path, custom_objects={'F1_score':'F1_score'})


# rescaling (Rescaling) -> typically redundant given the rescale=1/255 argument of ImageDataGenerator

# normalization: 
"""
So, if you have already scaled your data using the rescale parameter in the ImageDataGenerator, 
applying the Normalization layer with default settings (using the computed mean and standard deviation during 
training) would effectively perform a z-score normalization on the already scaled data. Depending on your 
specific use case and preference, you can choose to use either approach or decide whether additional normalization 
is necessary based on your data and model requirements.
"""



In [54]:
provided_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling (Rescaling)          (None, 224, 224, 3)  0           ['input_1[0][0]']                
                                                                                                  
 normalization (Normalization)  (None, 224, 224, 3)  0           ['rescaling[0][0]']              
                                                                                                  
 stem_conv (Conv2D)             (None, 112, 112, 32  864         ['normalization[0][0]']      

                                                                 ']                               
                                                                                                  
 block3b_project_bn (BatchNorma  (None, 28, 28, 48)  192         ['block3b_project_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 block3b_add (Add)              (None, 28, 28, 48)   0           ['block3b_project_bn[0][0]',     
                                                                  'block3a_project_bn[0][0]']     
                                                                                                  
 block4a_expand_conv (Conv2D)   (None, 28, 28, 192)  9216        ['block3b_add[0][0]']            
                                                                                                  
 block4a_e

                                                                                                  
 block4c_expand_activation (Act  (None, 14, 14, 384)  0          ['block4c_expand_bn[0][0]']      
 ivation)                                                                                         
                                                                                                  
 block4c_dwconv2 (DepthwiseConv  (None, 14, 14, 384)  3456       ['block4c_expand_activation[0][0]
 2D)                                                             ']                               
                                                                                                  
 block4c_bn (BatchNormalization  (None, 14, 14, 384)  1536       ['block4c_dwconv2[0][0]']        
 )                                                                                                
                                                                                                  
 block4c_a

                                                                                                  
 block5b_se_squeeze (GlobalAver  (None, 672)         0           ['block5b_activation[0][0]']     
 agePooling2D)                                                                                    
                                                                                                  
 block5b_se_reshape (Reshape)   (None, 1, 1, 672)    0           ['block5b_se_squeeze[0][0]']     
                                                                                                  
 block5b_se_reduce (Conv2D)     (None, 1, 1, 28)     18844       ['block5b_se_reshape[0][0]']     
                                                                                                  
 block5b_se_expand (Conv2D)     (None, 1, 1, 672)    19488       ['block5b_se_reduce[0][0]']      
                                                                                                  
 block5b_s

                                                                                                  
 block5d_se_excite (Multiply)   (None, 14, 14, 672)  0           ['block5d_activation[0][0]',     
                                                                  'block5d_se_expand[0][0]']      
                                                                                                  
 block5d_project_conv (Conv2D)  (None, 14, 14, 112)  75264       ['block5d_se_excite[0][0]']      
                                                                                                  
 block5d_project_bn (BatchNorma  (None, 14, 14, 112)  448        ['block5d_project_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 block5d_add (Add)              (None, 14, 14, 112)  0           ['block5d_project_bn[0][0]',     
          

 block6b_expand_conv (Conv2D)   (None, 7, 7, 1152)   221184      ['block6a_project_bn[0][0]']     
                                                                                                  
 block6b_expand_bn (BatchNormal  (None, 7, 7, 1152)  4608        ['block6b_expand_conv[0][0]']    
 ization)                                                                                         
                                                                                                  
 block6b_expand_activation (Act  (None, 7, 7, 1152)  0           ['block6b_expand_bn[0][0]']      
 ivation)                                                                                         
                                                                                                  
 block6b_dwconv2 (DepthwiseConv  (None, 7, 7, 1152)  10368       ['block6b_expand_activation[0][0]
 2D)                                                             ']                               
          

 2D)                                                             ']                               
                                                                                                  
 block6d_bn (BatchNormalization  (None, 7, 7, 1152)  4608        ['block6d_dwconv2[0][0]']        
 )                                                                                                
                                                                                                  
 block6d_activation (Activation  (None, 7, 7, 1152)  0           ['block6d_bn[0][0]']             
 )                                                                                                
                                                                                                  
 block6d_se_squeeze (GlobalAver  (None, 1152)        0           ['block6d_activation[0][0]']     
 agePooling2D)                                                                                    
          

 agePooling2D)                                                                                    
                                                                                                  
 block6f_se_reshape (Reshape)   (None, 1, 1, 1152)   0           ['block6f_se_squeeze[0][0]']     
                                                                                                  
 block6f_se_reduce (Conv2D)     (None, 1, 1, 48)     55344       ['block6f_se_reshape[0][0]']     
                                                                                                  
 block6f_se_expand (Conv2D)     (None, 1, 1, 1152)   56448       ['block6f_se_reduce[0][0]']      
                                                                                                  
 block6f_se_excite (Multiply)   (None, 7, 7, 1152)   0           ['block6f_activation[0][0]',     
                                                                  'block6f_se_expand[0][0]']      
          

                                                                  'block6h_se_expand[0][0]']      
                                                                                                  
 block6h_project_conv (Conv2D)  (None, 7, 7, 192)    221184      ['block6h_se_excite[0][0]']      
                                                                                                  
 block6h_project_bn (BatchNorma  (None, 7, 7, 192)   768         ['block6h_project_conv[0][0]']   
 lization)                                                                                        
                                                                                                  
 block6h_add (Add)              (None, 7, 7, 192)    0           ['block6h_project_bn[0][0]',     
                                                                  'block6g_add[0][0]']            
                                                                                                  
 top_conv 

In [120]:
batch = test_generator.next()
np.argmax(batch[1][0])

16

In [122]:
test_preds = provided_model.predict(batch[0])



In [123]:
batch[0].shape

(32, 224, 224, 3)

In [124]:
test_preds[0].shape, len(test_preds)

((525,), 32)

In [125]:
# Extremely high confidence
test_preds[0].max()

0.9821244

In [126]:
np.argmax(test_preds[0])

16

In [127]:
preds = []
for i in test_preds:
    preds.append(np.argmax(i))

In [128]:
preds

[16,
 144,
 23,
 123,
 25,
 390,
 161,
 298,
 309,
 174,
 369,
 301,
 326,
 91,
 205,
 33,
 24,
 467,
 208,
 323,
 441,
 267,
 2,
 100,
 291,
 88,
 115,
 517,
 265,
 178,
 220,
 191]

In [129]:
true_values = []
for i in batch[1]:
    true_values.append(np.argmax(i))

In [132]:
true_values 

[16,
 144,
 23,
 123,
 25,
 390,
 161,
 298,
 309,
 174,
 369,
 301,
 326,
 91,
 205,
 33,
 478,
 467,
 208,
 323,
 441,
 267,
 2,
 100,
 291,
 88,
 115,
 517,
 265,
 178,
 220,
 191]

In [133]:
list(zip(true_values, preds))

[(16, 16),
 (144, 144),
 (23, 23),
 (123, 123),
 (25, 25),
 (390, 390),
 (161, 161),
 (298, 298),
 (309, 309),
 (174, 174),
 (369, 369),
 (301, 301),
 (326, 326),
 (91, 91),
 (205, 205),
 (33, 33),
 (478, 24),
 (467, 467),
 (208, 208),
 (323, 323),
 (441, 441),
 (267, 267),
 (2, 2),
 (100, 100),
 (291, 291),
 (88, 88),
 (115, 115),
 (517, 517),
 (265, 265),
 (178, 178),
 (220, 220),
 (191, 191)]

In [135]:
reverse_dict[478], reverse_dict[24]

('TEAL DUCK', 'AMERICAN WIGEON')