In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!wget http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar

In [None]:
!ls; tar -xvf images.tar

In [None]:
!ls; pwd

In [None]:
!cd /kaggle/working/Images;ls

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

for dirname, _, filenames in os.walk('/kaggle/working/Images'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
IMAGE_DIR = '/kaggle/working/Images'

def make_dogbreed_dataframe(image_dir = IMAGE_DIR):
    paths=[]
    label_gubuns=[]
    for dirname,_,filenames in os.walk(image_dir):
        for filename in filenames:
            if '.jpg' in filename:
                file_path = dirname+'/'+filename
                paths.append(file_path)
                start_pos = file_path.find('/',20)
                end_pos = file_path.rfind('/')
                imsi_breed = file_path[start_pos+1:end_pos]
                breed = imsi_breed[imsi_breed.find('-')+1:]
                label_gubuns.append(breed)
    
    data_df = pd.DataFrame({'path':paths, 'label':label_gubuns})
    return data_df

In [None]:
data_df = make_dogbreed_dataframe('/kaggle/working/Images')
data_df.head()

In [None]:
data_df['label'].value_counts()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize=(26,4))
sns.countplot(data=data_df,x='label')
plt.xticks(rotation=90)

In [None]:
import cv2

def show_grid_images(image_path_list, ncols=8, title=None):
    figure, axs = plt.subplots(figsize=(22,4),nrows=1, ncols=ncols)
    for i in range(ncols):
        image = cv2.cvtColor(cv2.imread(image_path_list[i]),cv2.COLOR_BGR2RGB)
        axs[i].imshow(image)
        axs[i].set_title(title)

In [None]:
breed_image_list_01 = data_df[data_df['label']=='Staffordshire_bullterrier']['path'].iloc[:6].tolist()
breed_image_list_02 = data_df[data_df['label']=='American_Staffordshire_terrier']['path'].iloc[:6].tolist()

show_grid_images(breed_image_list_01, ncols=6, title='S')
show_grid_images(breed_image_list_02, ncols=6, title='A')

In [None]:
data_df['label'].value_counts().index.tolist()

In [None]:
breed_list = data_df['label'].value_counts().index.tolist()

for iter_cnt, breed in enumerate(breed_list):
    breed_image_list = data_df[data_df['label']==breed]['path'].iloc[:6].tolist()
    show_grid_images(breed_image_list, ncols=6, title=breed)
    if iter_cnt==4: 
        break

In [None]:
import albumentations as A

imsi_augmentor = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.2,0.2), contrast_limit=(-0.2,0.2),p=0.5),
    A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5)
])

def show_grid_images(image_path_list, augmentor=None, ncols=4, title=None):
    figure,axs=plt.subplots(figsize=(22,4),nrows=1, ncols=ncols)
    for i in range(ncols):
        image = cv2.cvtColor(cv2.imread(image_path_list[i]),cv2.COLOR_BGR2RGB)
        if augmentor:
            image = augmentor(image=image)['image']
        image = cv2.resize(image,(224,224))
        
        axs[i].imshow(image)
        axs[i].axis('off')
        
        axs[i].set_title(title)
        
breed_image_list_01 = data_df[data_df['label']=='Staffordshire_bullterrier']['path'].iloc[:6].tolist()
show_grid_images(breed_image_list_01, augmentor=None, ncols=6, title='original')
show_grid_images(breed_image_list_01, augmentor = imsi_augmentor, ncols=6, title='augmented')

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(data_df, test_size=0.4, stratify=data_df['label'],random_state=2021)


In [None]:
print(train_df['label'].value_counts())
print(test_df['label'].value_counts())

In [None]:
from sklearn.model_selection import train_test_split

train_path = train_df['path'].values
train_label = pd.get_dummies(train_df['label']).values

tr_path, val_path, tr_label, val_label = train_test_split(train_path, train_label, stratify=train_label, test_size=0.2, random_state=0)


In [None]:
from tensorflow.keras.utils import Sequence
import sklearn
import cv2

BATCH_SIZE=64
IMAGE_SIZE=224

class Breed_Dataset(Sequence):
    def __init__(self, image_filenames,labels,image_size=IMAGE_SIZE, batch_size=BATCH_SIZE,augmentor=None, shuffle=False, pre_func=None):
        self.image_filenames= image_filenames
        self.labels = labels
        self.image_size = image_size
        self.batch_size=batch_size
        self.augmentor = augmentor
        self.pre_func = pre_func
        self.shuffle = shuffle
        if self.shuffle:
            self.on_epoch_end()
            
    def __len__(self):
        return int(np.ceil(len(self.labels)/self.batch_size))
    
    def __getitem__(self, index):
        image_name_batch = self.image_filenames[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        image_batch = np.zeros((image_name_batch.shape[0],self.image_size,self.image_size,3))
        
        for image_index in range(image_name_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(image_name_batch[image_index]),cv2.COLOR_BGR2RGB)
            if self.augmentor is not None:
                image = self.augmentor(image=image)['image']
            image = cv2.resize(image,(self.image_size,self.image_size))
            
            if self.pre_func is not None:
                image = self.pre_func(image)
            image_batch[image_index] = image
        return image_batch, label_batch
    
    def on_epoch_end(self):
        if self.shuffle:
            self.image_filenames, self.labels = sklearn.utils.shuffle(self.image_filenames, self.labels)
        else: 
            pass
        
        

In [None]:
import albumentations as A

augmentor_light = A.Compose([
    A.HorizontalFlip(p=0.5)
])

In [None]:
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess_input
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input

tr_ds = Breed_Dataset(tr_path, tr_label,image_size=IMAGE_SIZE,batch_size=BATCH_SIZE,augmentor = augmentor_light,shuffle=True, pre_func= xcp_preprocess_input )
val_ds = Breed_Dataset(val_path, val_label, image_size=IMAGE_SIZE,batch_size=BATCH_SIZE,augmentor=None, shuffle=None, pre_func=xcp_preprocess_input)

tr_image_batch = next(iter(tr_ds))[0]
val_image_batch = next(iter(val_ds))[0]

print(tr_image_batch.shape, val_image_batch.shape)
print(tr_image_batch[:1])
print(val_image_batch[:1])

In [None]:
import time

tr_ds = Breed_Dataset(tr_path, tr_label, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, augmentor=augmentor_light, shuffle=True, pre_func=xcp_preprocess_input)
val_ds = Breed_Dataset(val_path, val_label, image_size=IMAGE_SIZE, batch_size=BATCH_SIZE, augmentor=None, shuffle=False, pre_func=xcp_preprocess_input)

start = time.time()
for value1, value2 in iter(tr_ds):
    end=time.time()
    print(end-start)
    start=end

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Conv2D, Dropout, Flatten, Activation, MaxPooling2D,GlobalAveragePooling2D
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint,LearningRateScheduler
from tensorflow.keras.applications import Xception, ResNet50V2, EfficientNetB0,EfficientNetB1
from tensorflow.keras.applications import MobileNet
import tensorflow as tf

def create_model(model_type = 'xception', in_shape=(224,224,3),n_classes=120):
    input_tensor=Input(shape=in_shape)
    
    if model_type=='resnet50v2':
        base_model = tf.keras.applications.ResNet50V2(input_tensor = input_tensor,include_top=False, weights='imagenet')
    elif model_type =='xception':
        base_model = tf.keras.applications.Xception(input_tensor = input_tensor, include_top=False, weights='imagenet')
    elif model_type =='efficientnetb0':
        base_model = tf.keras.applications.EfficientNetB0(input_tensor = input_tensor, include_top=False, weights='imagenet')
    elif model_type == 'efficientnetb1':
        base_model = tf.keras.applications.EfficientNetB1(input_tensor=input_tensor, include_top=False, weights='imagenet')
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024,activation='relu')(x)
    x = Dropout(0.5)(x)
    preds = Dense(units=n_classes, activation='softmax')(x)
    
    model = Model(inputs=input_tensor, outputs=preds)
    
    return model

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import Sequence
import sklearn 
import cv2

import albumentations as A

IMAGE_DIR = '/kaggle/working/Images' 

def make_dogbreed_dataframe(image_dir=IMAGE_DIR):
    paths=[]
    label_gubuns=[]
    for dirname,_,filenames in os.walk(image_dir):
        for filename in filenames:
            if '.jpg' in filename:
                file_path = dirname + '/' + filename
                paths.append(file_path)
                start_pos = file_path.find('/',20)
                end_pos = file_path.rfind('/')
                imsi_breed = file_path[start_pos+1:end_pos]
                breed = imsi_breed[imsi_breed.find('-')+1:]
                label_gubuns.append(breed)
    data_df = pd.DataFrame({'path':paths,'label':label_gubuns})
    return data_df

def get_train_valid(train_df, valid_size=0.2, random_state=2021):
    train_path = train_df['path'].values
    train_label = pd.get_dummies(train_df['label']).values
    
    tr_path, val_path, tr_label, val_label = train_test_split(train_path, train_label, test_size=valid_size, random_state=random_state)
    
    return tr_path, val_path, tr_label, val_label



In [None]:
N_EPOCHS = 30

def train_model(model_type, train_df, initial_lr =0.001, augmentor = None, input_pre_func=None):
    tr_path, val_path, tr_label, val_label = get_train_valid(train_df,valid_size=0.2,random_state=2021)
    tr_ds = Breed_Dataset(tr_path, tr_label,image_size=IMAGE_SIZE,batch_size=BATCH_SIZE,augmentor = augmentor,shuffle=True, pre_func= input_pre_func )
    val_ds = Breed_Dataset(val_path, val_label, image_size=IMAGE_SIZE,batch_size=BATCH_SIZE,augmentor=None, shuffle=None, pre_func=input_pre_func)
    
    model=create_model(model_type=model_type)
    model.compile(optimizer=Adam(lr=initial_lr),loss='categorical_crossentropy',metrics=['accuracy'])
    
    rlr_cb = ReduceLROnPlateau(monitor='val_loss',factor=0.2, patience=3, mode='min',verbose=1)
    ely_cb = EarlyStopping(monitor='val_loss',patience=10, mode='min',verbose=1)
    
    history = model.fit(tr_ds, epochs = N_EPOCHS, validation_data=val_ds, callbacks=[rlr_cb,ely_cb],verbose=1)
    
    return model, history

IMAGE_DIR = '/kaggle/working/Images'

data_df = make_dogbreed_dataframe(image_dir=IMAGE_DIR)
train_df, test_df = train_test_split(data_df, test_size=0.4, stratify=data_df['label'],random_state=2021)
    

In [None]:
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input

xception_model, xception_history = train_model(model_type='xception',train_df=train_df,initial_lr = 0.001, augmentor=augmentor_light,input_pre_func = xcp_preprocess_input)

In [None]:
test_path = test_df['path'].values
test_label = pd.get_dummies(test_df['label']).values

test_df['gt_class']= np.argmax(test_label, axis=1)

test_ds = Breed_Dataset(test_path, test_label, image_size=IMAGE_SIZE, batch_size = BATCH_SIZE, augmentor = None, shuffle=False, pre_func=xcp_preprocess_input)
xception_model.evaluate(test_ds)

In [None]:
predict_result = xception_model.predict(test_ds,steps=int(np.ceil(len(test_label)/BATCH_SIZE)))
predict_class = np.argmax(predict_result, axis=1)
test_df['xcp_pred_class']=predict_class

test_df[test_df['gt_class']!=test_df['xcp_pred_class']]['label'].value_counts()

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize=(26,4))
plt.xticks(rotation=90)
wrong_result_df = test_df[test_df['gt_class']!=test_df['xcp_pred_class']]
sns.countplot(data=wrong_result_df, x='label')

In [None]:
def show_grid_images(image_path_list, augmentor=None, ncols=4, title=None):
    figure, axs = plt.subplots(figsize=(22,4), nrows=1, ncols=ncols)
    for i in range(ncols):
        image = cv2.cvtColor(cv2.imread(image_path_list[i]),cv2.COLOR_BGR2RGB)
        image = cv2.resize(image,(224,224))
        if augmentor is not None:
            image = augmentor(image=image)['image']
        axs[i].imshow(image)
        axs[i].axis('off')
        axs[i].set_title(title)
        
breed_image_list_01 = data_df[data_df['label']=='Siberian_husky']['path'].iloc[:6].tolist()
breed_image_list_02 = data_df[data_df['label']=='Eskimo_dog']['path'].iloc[:6].tolist()

show_grid_images(breed_image_list_01,ncols=6, title='S')
show_grid_images(breed_image_list_02,ncols=6, title='E')

In [None]:
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess_input

effb0_model_t1, effb0_history_t1 = train_model(model_type='efficientnetb0',train_df=train_df,initial_lr=0.0001, augmentor=augmentor_light,input_pre_func = eff_preprocess_input)


In [None]:
test_path = test_df['path'].values
test_label = pd.get_dummies(test_df['label']).values

test_df = Breed_Dataset(test_path, test_label, image_size=IMAGE_SIZE, batch_size= BATCH_SIZE, augmentor=None, shuffle=False, pre_func=eff_preprocess_input)
effb0_model_t1.evaluate(test_df)

In [None]:
predict_result = effb0_model_t1.predict(test_df, steps=int(np.ceil(len(test_label)/BATCH_SIZE)))
predict_class = np.argmax(predict_result, axis=1)
test_df['effb0_t1_pred_class']= predict_class

test_df[test_df['gt_class']!=test_df['effb0_t1_pred_class']]['label'].value_counts()