Load Dataset

In [None]:
from pathlib import Path
import os
import sys

sys.path.append(str(Path(os.getcwd()).parents[1]))
from Helper_Functions import common_utils
directory_to_extract_to = os.getcwd()

common_utils.load_data_from_one_drive(directory_to_extract_to, "classification_paths", "90_animal_classification")

Load required libraries

In [2]:
import numpy as np
import pandas as pd
import os
import shutil
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import random
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.metrics import categorical_crossentropy

Preprocessing

In [3]:
data_dir = 'animals/animals'
Name = os.listdir(data_dir)
print(Name)
print(len(Name))

['antelope', 'badger', 'bat', 'bear', 'bee', 'beetle', 'bison', 'boar', 'butterfly', 'cat', 'caterpillar', 'chimpanzee', 'cockroach', 'cow', 'coyote', 'crab', 'crow', 'deer', 'dog', 'dolphin', 'donkey', 'dragonfly', 'duck', 'eagle', 'elephant', 'flamingo', 'fly', 'fox', 'goat', 'goldfish', 'goose', 'gorilla', 'grasshopper', 'hamster', 'hare', 'hedgehog', 'hippopotamus', 'hornbill', 'horse', 'hummingbird', 'hyena', 'jellyfish', 'kangaroo', 'koala', 'ladybugs', 'leopard', 'lion', 'lizard', 'lobster', 'mosquito', 'moth', 'mouse', 'octopus', 'okapi', 'orangutan', 'otter', 'owl', 'ox', 'oyster', 'panda', 'parrot', 'pelecaniformes', 'penguin', 'pig', 'pigeon', 'porcupine', 'possum', 'raccoon', 'rat', 'reindeer', 'rhinoceros', 'sandpiper', 'seahorse', 'seal', 'shark', 'sheep', 'snake', 'sparrow']
78


In [4]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            images.append(img)
    return images

In [6]:
for i in Name:
  images = load_images_from_folder(f"animals/animals/{i}/")
  print("Class: ",i," Count: ",len(images))

Class:  parrot  Count:  60
Class:  rhinoceros  Count:  60
Class:  goose  Count:  60
Class:  lion  Count:  60
Class:  snake  Count:  60
Class:  duck  Count:  60
Class:  cat  Count:  60
Class:  otter  Count:  60
Class:  okapi  Count:  60
Class:  bear  Count:  60
Class:  pelecaniformes  Count:  60
Class:  hippopotamus  Count:  60
Class:  flamingo  Count:  60
Class:  bee  Count:  60
Class:  owl  Count:  60
Class:  penguin  Count:  60
Class:  orangutan  Count:  60
Class:  wombat  Count:  60
Class:  horse  Count:  60
Class:  panda  Count:  60
Class:  possum  Count:  60
Class:  hyena  Count:  60
Class:  rat  Count:  60
Class:  seahorse  Count:  60
Class:  mouse  Count:  60
Class:  zebra  Count:  60
Class:  kangaroo  Count:  60
Class:  turtle  Count:  60
Class:  octopus  Count:  60
Class:  sheep  Count:  60
Class:  hummingbird  Count:  60
Class:  raccoon  Count:  60
Class:  squirrel  Count:  60
Class:  mosquito  Count:  60
Class:  pig  Count:  60
Class:  fly  Count:  60
Class:  hornbill  Count

Split data into validation and training

In [5]:
rootdir= 'animals/'
classes = Name

for i in classes:
  os.makedirs(rootdir +'/train/' + i)
  os.makedirs(rootdir +'/test/' + i)
  source = 'animals/animals/' + i
  allFileNames = os.listdir(source)
  np.random.shuffle(allFileNames)

  test_ratio = 0.05
  
  train_FileNames, test_FileNames = np.split(np.array(allFileNames),
                                                      [int(len(allFileNames)* (1 - test_ratio))])

  train_FileNames = [source+'/'+ name for name in train_FileNames.tolist()]
  test_FileNames = [source+'/' + name for name in test_FileNames.tolist()]

  for name in train_FileNames:
    shutil.copy(name, rootdir +'/train/' + i)

  for name in test_FileNames:
    shutil.copy(name, rootdir +'/test/' + i)

In [7]:
image_size = (256, 256)

In [9]:
train_dir = 'animals/train'
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(train_dir, 
                                                                    batch_size = 8,
                                                                    image_size = image_size,
                                                                    shuffle = True, 
                                                                    seed = 505,
                                                                    validation_split=0.1,
                                                                    subset = "training")
validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(train_dir, 
                                                                    batch_size = 8,
                                                                    image_size = image_size,
                                                                    shuffle = True, 
                                                                    seed = 505,
                                                                    validation_split=0.1,
                                                                    subset = "validation")

Found 4389 files belonging to 78 classes.
Using 3951 files for training.
Found 4389 files belonging to 78 classes.
Using 438 files for validation.


Load pre-trained CNN and create the last few layers

In [33]:
base_model=tf.keras.applications.EfficientNetB2(include_top=False, weights="imagenet",input_shape=(256, 256, 3), pooling='max') 

x=base_model.output
x=tf.keras.layers.BatchNormalization()(x)
x = Dense(256, activation='relu')(x)
x=Dropout(rate=.45)(x)        
output=Dense(90, activation='softmax')(x)

model=Model(inputs=base_model.input, outputs=output)
model.compile(Adamax(learning_rate=.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) 

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb2_notop.h5


Train the model

In [34]:
history=model.fit(x=train_dataset,  epochs=10, verbose=1,  validation_data=validation_dataset,
               callbacks = [tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=2)])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


Predict test images and calculate accuracy

In [35]:
Name.sort()

In [36]:
def predict (img,model):
    img = cv2.resize(img,dsize=image_size,interpolation=cv2.INTER_CUBIC)
    img = np.expand_dims(img, axis = 0)
    prediction = model.predict(img)
    cname =np.argmax(prediction)
    animal = Name[cname]    
    return animal

In [42]:
correct_pred = 0
total_test_images = 0

for i in tqdm(Name):
  images = load_images_from_folder(f"animals/test/{i}/")
  total_test_images+=len(images)
  for image in images:
    pred = predict(image,model)
    if(pred == i):
      correct_pred+=1


print(f"Accuracy : {(correct_pred/total_test_images)*100}")

100%|██████████| 90/90 [00:18<00:00,  4.80it/s]

Accuracy : 93.7037037037037





Test with images from the internet

In [38]:
img = cv2.imread('test_cat.jpg')
predict(img,model)

'cat'

In [39]:
img = cv2.imread('test_dog.jpg')
predict(img,model)

'dog'