In [None]:
import os
import glob
import pandas as pd
import numpy as np
import cv2
from sklearn.preprocessing import LabelBinarizer
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras.initializers import glorot_uniform

In [None]:

data = pd.read_csv(r'C:\Users\User\Desktop\jiko\dog-breed-identification\labels.csv')

breed_list = ['beagle', 'chihuahua', 'doberman','french_bulldog', 'golden_retriever', 'malamute', 'pug', 'saint_bernard', 'scottish_deerhound','tibetan_mastiff']

entries = data.loc[data['breed'].isin(breed_list)].index


train_df = pd.DataFrame(columns=['id','breed'])

train_df['id'] = ""
train_df['breed'] = ""

for i in range(0, len(entries)):
    
    train_df.at[i,'id']= data['id'].iloc[entries[i]]

    train_df.at[i,'breed']= data['breed'].iloc[entries[i]]


In [None]:
train_img_dir = r"C:\Users\User\Desktop\jiko\dog-breed-identification\train"

all_images = []
for i in range(0, len(train_df)) :

    image_ = train_df['id'].loc[i] + '.jpg'

    train_data_path = os.path.join(train_img_dir, image_)

    train_files = glob.glob(train_data_path)


    for f1 in train_files:
        img = cv2.imread(f1)
        img = cv2.resize(img, (224,224))
        all_images.append(img)

x_train = np.array(all_images)
print("Shape of training_data: ", x_train.shape, '\n', "shape of testing_data: ", x_train.dtype, '\n')

train_set = x_train.astype('float32')

'''
Labelizing labels.
'''
breed_count = train_df['breed'].value_counts()
print("breed_count is: ", breed_count, '\n')

y = train_df['breed'][0: :].values

target_ = LabelEncoder()
y = target_.fit_transform(y)

y = y.reshape(-1, 1)
onehotencoder = OneHotEncoder(sparse = False)
y_train = onehotencoder.fit_transform(y)


'''
Shuffling the data.
'''
train_set, y_train = shuffle(train_set, y_train, random_state=1)


In [None]:
'''
Building the identity block.
'''
def identity_block(x, kernel_s, filters):

    F1, F2, F3 = filters

    x_original = x

    #1st layer
    x = Conv2D(filters = F1, kernel_size = (1, 1), strides = (1, 1), padding = 'valid')(x)
    x = BatchNormalization(axis = 3)(x)
    x = Activation('relu')(x)

    #2nd layer
    x = Conv2D(filters = F2, kernel_size = (kernel_s, kernel_s), strides = (1, 1), padding = 'same')(x)
    x = BatchNormalization(axis = 3)(x)
    x = Activation('relu')(x)

    #3rd layer
    x = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = 'valid')(x)
    x = BatchNormalization(axis = 3)(x)

    x = Add()([x, x_original])
    x = Activation('relu')(x)

    return x


In [None]:
'''
Building convolution blocks
'''
def convolution_block(x, kernel_s, filters, stride_ = 2):

    F1, F2, F3 = filters

    x_original = x

    #1st layer
    x = Conv2D(F1, (1, 1), strides = (stride_, stride_))(x)
    x = BatchNormalization(axis = 3)(x)
    x = Activation('relu')(x)

    #2nd layer
    x = Conv2D(filters = F2, kernel_size = (kernel_s, kernel_s), strides = (1, 1), padding = 'same')(x)
    x = BatchNormalization(axis = 3)(x)
    x = Activation('relu')(x)

    #3rd layer
    x = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1, 1), padding = 'valid')(x)
    x = BatchNormalization(axis = 3)(x)

    x_original = Conv2D(filters = F3, kernel_size = (1, 1), strides = (stride_, stride_), padding = 'valid')(x_original)
    x_original = BatchNormalization(axis = 3)(x_original)


    x = Add()([x, x_original])
    x = Activation('relu')(x)

    return x


In [None]:
'''
Building Resnet50.
'''
def Resnet(input_shape = (224, 224, 3), classes = y_train.shape[1]):

    x_ = Input(input_shape)

    x = ZeroPadding2D((3, 3))(x_)

    #1st stage
    x = Conv2D(64, (7, 7), strides= (2, 2))(x)
    x = BatchNormalization(axis= 3)(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides= (2, 2))(x)

    #stage 2
    x = convolution_block(x, kernel_s=3, filters=[64,64,256], stride_ =1)

    x = identity_block(x, 3, [64, 64, 256])

    x = identity_block(x, 3, [64, 64, 256])

    #stage 3
    x = convolution_block(x, kernel_s=3, filters=[128,128,512], stride_ =2)
    x = identity_block(x, 3, [128,128,512])
    x = identity_block(x, 3, [128,128,512])
    x = identity_block(x, 3, [128,128,512])

    #stage 4
    x = convolution_block(x, kernel_s=3, filters=[256,256,1024], stride_ =2)
    x = identity_block(x, 3, [256,256,1024])
    x = identity_block(x, 3, [256,256,1024])
    x = identity_block(x, 3, [256,256,1024])
    x = identity_block(x, 3, [256,256,1024])
    x = identity_block(x, 3, [256,256,1024])

    #stage 5
    x = convolution_block(x, kernel_s=3, filters=[512,512,2048], stride_ =2)
    x = identity_block(x, 3, [512,512,2048])
    x = identity_block(x, 3, [512,512,2048])

    #averagepool
    x = AveragePooling2D((2,2), name = "avg_pool")(x)

    #output layer
    x = Flatten()(x)
    x = Dense(classes, activation='softmax', name= 'fully_connected_layer', kernel_initializer= glorot_uniform(seed=0))(x)

    model = Model(inputs = x_ , outputs = x, name = 'Resnet')

    return model

In [None]:
'''
Running the model using the train features and train labels. Saving the trained model into drive location.
kindly change the drive location as per choice.
'''
model = Resnet(input_shape = (224, 224, 3), classes = y_train.shape[1])
model.compile(optimizer= 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
model.summary()

model.fit(train_set, y_train, epochs = 100, batch_size = 32) #running fro 100 epochs. Can run as per need.

model.save(r"C:\Users\User\Desktop\jiko\resnet50")


In [None]:
'''
    Testing the model with unseen images. Labels are not been provided in the given link in assignment. Hence, for calculating confussion matrix is not possible. 
'''
import os
import glob
import pandas as pd
import numpy as np
import cv2
import keras


def load_test_images(folder):

    images = []

    count = 0

    for file_ in os.listdir(folder):


        img = cv2.imread( os.path.join(folder, file_))

        if img is not None:
            img = cv2.resize(img, (224,224))
            images.append(img)

        count = count + 1

        if count <= 400:

            continue

        else:

            break


    x_test = np.array(images, dtype='float32')
    print("Test data shape: ", x_test.shape)

    return x_test



def prediction(array_):

    train_df = pd.read_csv(r'C:\Users\User\Desktop\jiko\dog-breed-identification\labels.csv')

    '''
    Labelizing labels.
    '''
    y = train_df['breed'][0: :].values

    target_ = LabelEncoder()

    y = target_.fit_transform(y)

    labels_ = list(target_.classes_)




    '''
        Prediction
    '''
    model = keras.models.load_model(r"C:\Users\User\Desktop\jiko\resnet_50")

    predictions = model.predict(array_)

    max_prob = np.argmax(predictions,axis=1)

    list_ = {'Score': predictions[0][max_prob[0]], 'label_name': labels_[max_prob[0]] }

    return list_


if __name__ == '__main__':

    test_img_dir = r"C:\Users\User\Desktop\jiko\dog-breed-identification\test"


    test_data = load_test_images(test_img_dir)

    predictions = prediction(test_data)

    print(predictions)
