In [None]:
import gc
import os
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

### Loading the Data

In [None]:
root = Path("/kaggle/input/dsgt-dog-breed-identification")

image_paths = list(root.glob("train/*.jpg"))
path = image_paths[11]
img = cv2.imread(str(path))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.show()

In [None]:
df = pd.read_csv(f"{root}/labels.csv")
train = f"{root}/train/"
test = f"{root}/test/"

In [None]:
df.head()

In [None]:
dog_breeds = sorted(list(set(df['breed'])))
n_classes = len(dog_breeds)
print(n_classes)
dog_breeds[:10]

In [None]:
class_to_num = dict(zip(dog_breeds, range(n_classes)))

In [None]:
class_to_num['toy_poodle']

In [None]:
df['file_name'] = df['id'].apply(lambda x:train+f"{x}.jpg")
df.head()

In [None]:
codes = []
for index, data in df.iterrows():
    breed = data['breed']
    codes.append(class_to_num[breed])

### Visualizing the Data

In [None]:
plt.figure(figsize=(30,20))
sns.countplot(y="breed",data=df,palette="Set1")
plt.show()

In [None]:
nrow=5
ncol=5
fig,ax=plt.subplots(nrow,ncol,figsize=(10,10))
for i,(img_id,breed) in enumerate(df[["id","breed"]].values[: nrow*ncol]):
    image_path=os.path.join(train,img_id+".jpg")
    image=cv2.imread(image_path)
    image=cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    row=i//ncol
    col=i%ncol
    ax[row,col].imshow(image)
    ax[row,col].set_title(breed)
    ax[row,col].axis("off")
    
plt.tight_layout()
plt.show()

In [None]:
print("Total number of unique Dog Breeds :",len(df.breed.unique()))

### Preprocessing Data

In [None]:
import numpy as np
from tensorflow.keras.applications.mobilenet import preprocess_input
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
num_breed = 120
image_size = 200
batch_size = 32
encoder = LabelEncoder()

In [None]:
train_data=np.zeros((len(df),image_size,image_size,3),dtype="float32")
for i,img_id in enumerate(df["file_name"]):
    img=cv2.resize(cv2.imread(img_id,cv2.IMREAD_COLOR),((image_size,image_size)))
    img_array=preprocess_input(np.expand_dims(np.array(img[...,: : -1].astype(np.float32)).copy(),axis=0))
    train_data[i]=img_array

In [None]:
train_data[0]

In [None]:
label_data = encoder.fit_transform(df["breed"].values)

In [None]:
label_data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(train_data,label_data,test_size=0.2,random_state=42)

In [None]:
print("************************************************************")
print("Train Data Shape :",x_train.shape)
print("************************************************************")
print("Test Data Shape :",x_test.shape)
print("************************************************************")
print("Train Label Data Shape :",y_train.shape)
print("************************************************************")
print("Test Label Data Shape :",y_test.shape)
print("************************************************************")

### Training

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rotation_range=45,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.25,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

train_generator = train_datagen.flow(x_train, 
                                     y_train, 
                                     batch_size=batch_size)

test_datagen = ImageDataGenerator()
 
test_generator = test_datagen.flow(x_test, 
                                   y_test, 
                                   batch_size=batch_size)

In [None]:
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model
import tensorflow as tf

resnet = ResNet50V2(input_shape = [image_size,image_size,3], weights='imagenet', include_top=False)
for layer in resnet.layers:
    layer.trainable = False
x = resnet.output
x = BatchNormalization()(x)
x = GlobalAveragePooling2D()(x)
# x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
# x = Dropout(0.5)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.2)(x)
prediction = Dense(num_breed, activation='softmax')(x)
model = Model(inputs=resnet.input, outputs=prediction)

learning_rate = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer,loss='sparse_categorical_crossentropy',metrics=["accuracy"])
model.summary()

In [None]:
history = model.fit(train_generator,steps_per_epoch= x_train.shape[0] // batch_size,epochs=15,
                 validation_data= test_generator,
                 validation_steps= x_test.shape[0] // batch_size)

In [None]:
plt.figure(figsize=(7,5))
plt.plot(history.history['accuracy'],label="Train_Accuracy",color="blue",marker="*")
plt.plot(history.history['val_accuracy'],label="Validation_Accuracy",color="red",marker="P")
plt.grid(True)
plt.legend()
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.show()

In [None]:
plt.figure(figsize=(7,5))
plt.plot(history.history['loss'],color="green",marker="D")
plt.plot(history.history['val_loss'],color="navy",marker="H")
plt.grid(True)
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Train_Loss', 'Validation_Loss'], loc='upper left')
plt.show()

### Predicting

In [None]:
del train_data
gc.collect()

In [None]:
test_files = []
for pic in os.listdir(test):
    test_files.append(test+pic)

In [None]:
test_df = pd.DataFrame({'file_name':test_files})

In [None]:
test_df.head()

In [None]:
test_data=np.zeros((len(test_df),image_size,image_size,3),dtype="float32")
for i,img_id in enumerate(test_df["file_name"]):
    img=cv2.resize(cv2.imread(img_id,cv2.IMREAD_COLOR),((image_size,image_size)))
    img_array=preprocess_input(np.expand_dims(np.array(img[...,: : -1].astype(np.float32)).copy(),axis=0))
    test_data[i]=img_array
    

In [None]:
test_data[0]

In [None]:
pre = model.predict(test_data)
predictions = np.argmax(pre, axis=1)

In [None]:
pre

In [None]:
sample_df = pd.read_csv('/kaggle/input/dsgt-dog-breed-identification/sample_submission.csv')

In [None]:
for b in dog_breeds:
    sample_df[b] = pre[:,class_to_num[b]]
    sample_df.to_csv('pred.csv', index=None)

In [None]:
sample_df

In [None]:
predictions

In [None]:
def get_key(val): 
    for key, value in class_to_num.items(): 
        if val == value: 
            return key 

In [None]:
class_to_num = dict(zip(dog_breeds, range(n_classes)))
class_to_num['toy_poodle']

In [None]:
named_predictions = []
for i in predictions:
    named_predictions.append(get_key(i))

In [None]:
named_predictions

In [None]:
test_data['breed'] = named_predictions

In [None]:
test_data.head()