In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import os

def make_catndog_dataframe():
    paths = []
    dataset_gubuns=[]
    label_gubuns=[]
    
    for dirname, _, filenames in os.walk('/kaggle/input/cat-and-dog'):
        for filename in filenames:
            if '.jpg' in filename :
                file_path = dirname + '/' + filename
                paths.append(file_path)
                if '/training_set/' in file_path:
                    dataset_gubuns.append('train')
                elif '/test_set/' in file_path:
                    dataset_gubuns.append('test')
                else : dataset_gubuns.append('N/A')
                
                if 'dogs' in file_path:
                    label_gubuns.append('DOG')
                elif 'cats' in file_path:
                    label_gubuns.append('CAT')
                else : label_gubuns.append('N/A')
                    
    data_df = pd.DataFrame({'path':paths, 'dataset':dataset_gubuns, 'label':label_gubuns})
    return data_df

In [None]:
data_df= make_catndog_dataframe()
print(data_df.shape)
data_df.head(5)

In [None]:
list_a = [1,2,3,4,5]
print(len(list_a))
print(list_a[0])

In [None]:
from tensorflow.keras.utils import Sequence
import sklearn
import cv2

BATCH_SIZE=64
IMAGE_SIZE=224

class CnD_Dataset(Sequence):
    def __init__(self, image_filenames, labels, batch_size=BATCH_SIZE,augmentor=None, shuffle=False):
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
        self.augmentor = augmentor
        self.shuffle = shuffle
        
        if self.shuffle:
            self.on_epoch_end()
            
    def __len__(self):
        return int(np.ceil(len(self.labels)/self.batch_size))
    
    def __getitem__(self, index):
        image_name_batch = self.image_filenames[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        
        image_batch = np.zeros((image_name_batch.shape[0],IMAGE_SIZE,IMAGE_SIZE,3))
        
        for image_index in range(image_name_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(image_name_batch[image_index]),cv2.COLOR_BGR2RGB)
            image = cv2.resize(image,(IMAGE_SIZE,IMAGE_SIZE))
            if self.augmentor is not None:
                image = self.augmentor(image=image)['image']
            image_batch[image_index] = image
        return image_batch, label_batch
    
    def on_epoch_end(self):
        if(self.shuffle):
            self.image_filenames, self.labels = sklearn.utils.shuffle(self.image_filenames,self.labels)
        else:
            pass


In [None]:
data_df.head()

In [None]:
import albumentations as A

train_df = data_df[data_df['dataset']=='train']
test_df = data_df[data_df['dataset']=='test']


In [None]:
train_image_filenames = train_df['path'].values
train_image_labels = train_df['label'].values

cnd_augmentor = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5)
])

cnd_ds = CnD_Dataset(train_image_filenames, train_image_labels, batch_size=BATCH_SIZE,augmentor=cnd_augmentor,shuffle=False)

In [None]:
train_image_filenames.shape

In [None]:
images_batch = next(iter(cnd_ds))[0]
labels_batch = next(iter(cnd_ds))[1]
print(images_batch.shape, labels_batch.shape)
print(images_batch[0])

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def show_grid_images(images_batch, ncols=4, title=None):
    figure, axs = plt.subplots(figsize=(22,4),nrows=1, ncols=ncols)
    for i in range(ncols):
        axs[i].imshow(np.array(images_batch[i],dtype='int32'))
        axs[i].axis('off')
        axs[i].set_title(title[i])
show_grid_images(images_batch, ncols=6, title = 'augmented '+labels_batch)

In [None]:
class CnD_Dataset(Sequence):
    def __init__(self, image_filenames, labels, batch_size= BATCH_SIZE, augmentor=None, shuffle=False, pre_func = None):
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
        self.augmentor = augmentor
        self.pre_func = pre_func
        self.shuffle = shuffle
        
        if self.shuffle:
            self.on_epoch_end()
    
    def __len__(self):
        return int(np.ceil(len(self.labels)/self.batch_size))
    
    def __getitem__(self, index):
        image_name_batch = self.image_filenames[index*self.batch_size:(index+1)*self.batch_size]
        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index+1)*self.batch_size]
        image_batch = np.zeros((image_name_batch.shape[0],IMAGE_SIZE,IMAGE_SIZE,3))
        
        for image_index in range(image_name_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(image_name_batch[image_index]),cv2.COLOR_BGR2RGB)
            image = cv2.resize(image,(IMAGE_SIZE,IMAGE_SIZE))
            if self.augmentor is not None:
                image = self.augmentor(image=image)['image']
            if self.pre_func is not None:
                image = self.pre_func(image)
            image_batch[image_index] = image
        return image_batch, label_batch
    
    def on_epoch_end(self):
        if(self.shuffle):
            self.image_filenames, self.labels = sklearn.utils.shuffle(self.image_filenames,self.labels)
        else: pass

In [None]:
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input

cnd_ds = CnD_Dataset(train_image_filenames, train_image_labels, batch_size=BATCH_SIZE,augmentor =cnd_augmentor, shuffle=False, pre_func = xcp_preprocess_input)

images_batch = next(iter(cnd_ds))[0]
labels_batch = next(iter(cnd_ds))[1]


In [None]:
cnd_augmentor = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5)
])

def zero_one_scaler(image):
    return image/255.0
cnd_ds = CnD_Dataset(train_image_filenames, train_image_labels, batch_size=BATCH_SIZE, 
                         augmentor=cnd_augmentor, shuffle=False, pre_func=zero_one_scaler)
images_batch = next(iter(cnd_ds))[0]
labels_batch = next(iter(cnd_ds))[1]


In [None]:
labels_ohe = pd.get_dummies(train_df['label']).values
labels_enc = pd.factorize(train_df['label'])[0]

print(labels_ohe.shape, labels_enc.shape)

In [None]:
from sklearn.model_selection import train_test_split

train_df = data_df[data_df['dataset']=='train']
test_df = data_df[data_df['dataset']=='test']

train_path = train_df['path'].values
train_label = pd.factorize(train_df['label'])[0]

tr_path, val_path, tr_label, val_label = train_test_split(train_path, train_label, test_size=0.15, random_state=2021)


In [None]:
cnd_augmentor = A.Compose([
    A.HorizontalFlip(p=0.5)
])

In [None]:
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess_input

tr_ds = CnD_Dataset(tr_path, tr_label, batch_size=BATCH_SIZE, augmentor=cnd_augmentor, shuffle=True, pre_func = xcp_preprocess_input)
val_ds = CnD_Dataset(val_path, val_label, batch_size=BATCH_SIZE,augmentor = cnd_augmentor, shuffle=False, pre_func = xcp_preprocess_input)

tr_image_batch = next(iter(tr_ds))[0]
val_image_batch = next(iter(val_ds))[0]

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Conv2D, Dropout, Flatten, Activation, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import Xception, MobileNetV2

def create_model(model_name = 'vgg16', verbose=False):
    input_tensor = Input(shape=(IMAGE_SIZE,IMAGE_SIZE,3))
    if model_name =='vgg16':
        base_model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'resnet50':
        base_model = ResNet50V2(input_tensor_input_tensor, inclusde_top=False, weights='imagenet')
    elif model_name == 'xception':
        base_model = Xception(input_tensor = input_tensor, include_top=False,weights='imagenet')
    elif model_name == 'mobilenet':
        base_model = MobileNetV2(input_tensor = input_tensor, include_top=False, weights='imagenet')
        
    bm_output = base_model.output
    
    x = GlobalAveragePooling2D()(bm_output)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu',name='fc1')(x)
    output = Dense(1, activation='sigmoid', name='output')(x)
    model = Model(inputs = input_tensor, outputs = output)
    
    if verbose:
        model.summary()
    return model

In [None]:
import tensorflow as tf

tf.keras.backend.clear_session()

model = create_model(model_name='xception')

model.compile(optimizer=Adam(0.001), loss='binary_crossentropy',metrics=['accuracy'])
rlr_cb = ReduceLROnPlateau(monitor='val_loss',factor=0.2, patience=3, mode='min',verbose=1)
ely_cb = EarlyStopping(monitor='val_loss',patience=5, mode='min', verbose=1)

In [None]:
N_EPOCHS=15
history = model.fit(tr_ds, epochs=N_EPOCHS
                   ,steps_per_epoch=int(np.ceil(tr_path.shape[0]/BATCH_SIZE)),
                   validation_data = val_ds, validation_steps=int(np.ceil(val_path.shape[0]/BATCH_SIZE)),
                   callbacks=([rlr_cb,ely_cb]),verbose=1)

In [None]:
test_df = data_df[data_df['dataset']==test]

test_path = test_df['path'].values
test_label = pd.factorize(test_df['label'])[0]

test_ds = CnD_Dataset(test_path, test_label, batch_size=BATCH_SIZE, augmentor = None, shuffle=None,pre_func=xcp_preprocess_input)
model.evaluate(test_ds)