# Bird Species Classification:
### Using Computer Vision to Classify 525 Different Bird Species

##### By: David Hartsman

<hr style="border: 2px solid blue">

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import tensorflow.keras

import datetime

from tqdm import tqdm

from pathlib import Path

# To streamline the naming and generate tabular image data
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Keras model types and layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras import backend

# Metrics for classification
from sklearn.metrics import classification_report
from tensorflow.keras.metrics import FalseNegatives, FalsePositives, TrueNegatives, TruePositives

2024-02-04 17:15:57.050673: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


<hr style="border: 2px solid blue">

### Data Inspection

In [2]:
df = pd.read_csv('/Users/samalainabayeva/Desktop/FLAT_IRON!!!/birds_archive/birds.csv')

In [3]:
df.head()

Unnamed: 0,class id,filepaths,labels,data set,scientific name
0,0.0,train/ABBOTTS BABBLER/001.jpg,ABBOTTS BABBLER,train,MALACOCINCLA ABBOTTI
1,0.0,train/ABBOTTS BABBLER/007.jpg,ABBOTTS BABBLER,train,MALACOCINCLA ABBOTTI
2,0.0,train/ABBOTTS BABBLER/008.jpg,ABBOTTS BABBLER,train,MALACOCINCLA ABBOTTI
3,0.0,train/ABBOTTS BABBLER/009.jpg,ABBOTTS BABBLER,train,MALACOCINCLA ABBOTTI
4,0.0,train/ABBOTTS BABBLER/002.jpg,ABBOTTS BABBLER,train,MALACOCINCLA ABBOTTI


In [4]:
# Almost 90k images
df.shape

(89885, 5)

In [5]:
# There is a 2 - to - 1 ratio of max class observations to min class observations
df["class id"].value_counts()

class id
426.0    273
286.0    258
181.0    243
367.0    243
40.0     230
        ... 
357.0    140
445.0    140
375.0    140
211.0    140
408.0    140
Name: count, Length: 525, dtype: int64

<hr style="border: 2px solid blue">

### Using Tensorflow's ImageDataGenerator

In [24]:
# Instantiate an image generator object
image_generator = ImageDataGenerator()

# Transpose the data from image files to tabular data using the labeling structure provided by tensorflow
data = image_generator.flow_from_directory('/Users/samalainabayeva/Desktop/FLAT_IRON!!!/birds_archive/train')

Found 84635 images belonging to 525 classes.


In [38]:
type(data)

keras.preprocessing.image.DirectoryIterator

In [39]:
for dataframe in data:
    images = dataframe['filename']  # Access images
    labels = dataframe['class']     # Access labels
    # Process the batch of images and labels here
    print("Batch of images shape:", images.shape)
    print("Batch of labels shape:", labels.shape)
    break

TypeError: tuple indices must be integers or slices, not str

In [41]:
# Instantiate an image generator object
image_generator = ImageDataGenerator()

# Transpose the data from image files to tabular data using the labeling structure provided by tensorflow
data = image_generator.flow_from_directory('/Users/samalainabayeva/Desktop/FLAT_IRON!!!/birds_archive/train')

for dataframe in data:
    images = dataframe['filename']  # Access images
    labels = dataframe['class']     # Access labels
    # Process the batch of images and labels here
    print("Batch of images shape:", images.shape)
    print("Batch of labels shape:", labels.shape)
    break

Found 84635 images belonging to 525 classes.


TypeError: tuple indices must be integers or slices, not str

In [44]:
data[0][0].max()

255.0

In [45]:
from keras.preprocessing.image import ImageDataGenerator

# Define the main directory containing subdirectories for each bird species
main_directory = '/Users/samalainabayeva/Desktop/FLAT_IRON!!!/birds_archive/train'

# Define data augmentation and normalization options
image_generator = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0,1]
    rotation_range=20,  # Randomly rotate images by up to 20 degrees
    width_shift_range=0.2,  # Randomly shift images horizontally
    height_shift_range=0.2,  # Randomly shift images vertically
    shear_range=0.2,  # Shear intensity
    zoom_range=0.2,  # Randomly zoom images
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill in missing pixels with the nearest value
)

# Generate batches of augmented data from images in the directory
data_generator = image_generator.flow_from_directory(
    main_directory,
    target_size=(224, 224),  # Resize images to 224x224 pixels
    batch_size=32,  # Define batch size
    class_mode='categorical'  # Use categorical labels
)

# Check the class indices (corresponding to bird species labels)
class_indices = data_generator.class_indices
print("Class indices:", class_indices)

# Accessing batch data and labels
for images, labels in data_generator:
    # images: a batch of input images (shape: (batch_size, 224, 224, 3))
    # labels: a batch of corresponding labels (shape: (batch_size, num_classes))
    # Process each batch of data here
    print("Batch of images shape:", images.shape)
    print("Batch of labels shape:", labels.shape)
    break  # Exit the loop after processing the first batch

Found 84635 images belonging to 525 classes.
Class indices: {'ABBOTTS BABBLER': 0, 'ABBOTTS BOOBY': 1, 'ABYSSINIAN GROUND HORNBILL': 2, 'AFRICAN CROWNED CRANE': 3, 'AFRICAN EMERALD CUCKOO': 4, 'AFRICAN FIREFINCH': 5, 'AFRICAN OYSTER CATCHER': 6, 'AFRICAN PIED HORNBILL': 7, 'AFRICAN PYGMY GOOSE': 8, 'ALBATROSS': 9, 'ALBERTS TOWHEE': 10, 'ALEXANDRINE PARAKEET': 11, 'ALPINE CHOUGH': 12, 'ALTAMIRA YELLOWTHROAT': 13, 'AMERICAN AVOCET': 14, 'AMERICAN BITTERN': 15, 'AMERICAN COOT': 16, 'AMERICAN DIPPER': 17, 'AMERICAN FLAMINGO': 18, 'AMERICAN GOLDFINCH': 19, 'AMERICAN KESTREL': 20, 'AMERICAN PIPIT': 21, 'AMERICAN REDSTART': 22, 'AMERICAN ROBIN': 23, 'AMERICAN WIGEON': 24, 'AMETHYST WOODSTAR': 25, 'ANDEAN GOOSE': 26, 'ANDEAN LAPWING': 27, 'ANDEAN SISKIN': 28, 'ANHINGA': 29, 'ANIANIAU': 30, 'ANNAS HUMMINGBIRD': 31, 'ANTBIRD': 32, 'ANTILLEAN EUPHONIA': 33, 'APAPANE': 34, 'APOSTLEBIRD': 35, 'ARARIPE MANAKIN': 36, 'ASHY STORM PETREL': 37, 'ASHY THRUSHBIRD': 38, 'ASIAN CRESTED IBIS': 39, 'ASIAN DOL

Batch of images shape: (32, 224, 224, 3)
Batch of labels shape: (32, 525)


In [51]:
# Initialize lists to store images and labels
images_list = []
labels_list = []

# Iterate over the data iterator to process batches of data
for images, labels in tqdm(data_generator):
    # Append images and labels from the current batch to the lists
    images_list.append(images)
    labels_list.append(labels)
    
    if len(images_list) * images.shape[0] >= len(data_generator.filenames):
        break

100%|█████████████████████████████████████████████████████████████████████████████████▉| 2644/2645 [19:25<00:00,  2.27it/s]


In [None]:
# Concatenate the lists of images and labels into NumPy arrays
images_array = np.concatenate(images_list, axis=0)
# labels_array = np.concatenate(labels_list, axis=0)

NameError: name 'images_list' is not defined