## Import all necessary libraries 

In [1]:
import os 
import numpy as np 
import pandas as pd 
import cv2 
from glob import glob 
from tqdm import tqdm 
import tensorflow as tf 
from sklearn.model_selection import train_test_split

## Image Preprocessing

In [8]:
def read_image(path, size):
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (size, size))
    image = image / 255.0 
    image = image.astype(np.float32)
    return image 

In [2]:
path = './'
train_path = os.path.join(path, 'train/*')
test_path = os.path.join(path, 'test/*')
labels_path = os.path.join(path, 'labels.csv')

In [3]:
# Checking the csv file as a Pandas Dataframe
labels_df = pd.read_csv(labels_path)
labels_df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [4]:
# Exploring breeds column
breed = labels_df["breed"].unique() # stores list of unique breeds
print(breed)

['boston_bull' 'dingo' 'pekinese' 'bluetick' 'golden_retriever'
 'bedlington_terrier' 'borzoi' 'basenji' 'scottish_deerhound'
 'shetland_sheepdog' 'walker_hound' 'maltese_dog' 'norfolk_terrier'
 'african_hunting_dog' 'wire-haired_fox_terrier' 'redbone'
 'lakeland_terrier' 'boxer' 'doberman' 'otterhound' 'standard_schnauzer'
 'irish_water_spaniel' 'black-and-tan_coonhound' 'cairn' 'affenpinscher'
 'labrador_retriever' 'ibizan_hound' 'english_setter' 'weimaraner'
 'giant_schnauzer' 'groenendael' 'dhole' 'toy_poodle' 'border_terrier'
 'tibetan_terrier' 'norwegian_elkhound' 'shih-tzu' 'irish_terrier'
 'kuvasz' 'german_shepherd' 'greater_swiss_mountain_dog' 'basset'
 'australian_terrier' 'schipperke' 'rhodesian_ridgeback' 'irish_setter'
 'appenzeller' 'bloodhound' 'samoyed' 'miniature_schnauzer'
 'brittany_spaniel' 'kelpie' 'papillon' 'border_collie' 'entlebucher'
 'collie' 'malamute' 'welsh_springer_spaniel' 'chihuahua' 'saluki' 'pug'
 'malinois' 'komondor' 'airedale' 'leonberg' 'mexican_h

In [10]:
breed2id = {name: i for i, name in enumerate(breed)}
id2breed = {i: name for i, name in enumerate(breed)}

In [11]:
# So, it's a multiclassification problem 
# We use enumerate() over a dictionary that transcribes 
# each breed it's breedid 
labels = []
breed2id = {name: i for i , name in enumerate(breed)}

ids = glob(train_path) # used to fetch addresses of all images 
# inside the train folder 
# Preprocessing the training data 
for image_id in ids:
    image_id = image_id.split('\\')[-1].split('.')[0]
    # print(image_id) <-- all image ids 
    breed_name = list(labels_df[labels_df.id == image_id]['breed'])[0]
    #print(image_id, '\t', breed_name)
    breed_idx = breed2id[breed_name]
    labels.append(breed_idx)

ids = ids[:1000]
labels = labels[:1000]

## Splitting the dataset

In [12]:
train_x, valid_x = train_test_split(ids, test_size = 0.2, random_state = 42)
train_y, valid_y = train_test_split(labels, test_size = 0.2, random_state = 42)

## Model

In [13]:
model = tf.keras.models.load_model('model.h5')

In [24]:
for i, path in tqdm(enumerate(valid_x[:10])):
    image = read_image(path, 224)
    image = np.expand_dims(image, axis = 0)
    pred = model.predict(image)[0]
    # pred --> will return a vector of size 120
    # here label_idx is the maximum probability 
    # i.e. the maximum value to consider 
    # index for the label 
    label_idx = np.argmax(pred)
    breed_name = id2breed[label_idx]
    
    ori_breed = id2breed[valid_y[i]] #original breed 
    ori_image = cv2.imread(path, cv2.IMREAD_COLOR)
    
    # LET'S write the original breed name 
    # and the predicted breed name 
    ori_image = cv2.putText(ori_image, breed_name, (0,10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
    ori_image = cv2.putText(ori_image, breed_name, (200, 330), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
    
    cv2.imwrite(f'save/valid_{i}.png', ori_image)

10it [00:00, 14.97it/s]
