In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
paths=[]
dataset_gubuns=[]
label_gubuns=[]

for dirname, _, filenames in os.walk('/kaggle/input/cat-and-dog'):
    for filename in filenames:
        if '.jpg' in filename:
            file_path = dirname + '/' + filename
            paths.append(file_path)
            
            if '/training_set/' in file_path:
                dataset_gubuns.append('train')
            elif '/test_set/' in file_path:
                dataset_gubuns.append('test')
            else : dataset_gubuns.append('N/A')
                
            if 'dogs' in file_path:
                label_gubuns.append('DOG')
            elif 'cats' in file_path:
                label_gubuns.append('CAT')
            else : label_gubuns.append('N/A')

In [None]:
paths[:10], dataset_gubuns[:10], label_gubuns[:10]

In [None]:
pd.set_option('display.max_colwidth',200)

In [None]:
data_df = pd.DataFrame({'path':paths, 'dataset':dataset_gubuns, 'label':label_gubuns})
print(data_df.shape)
data_df.head(10)

In [None]:
print(data_df['dataset'].value_counts())
print(data_df['label'].value_counts())

In [None]:
import matplotlib.pyplot as plt
import cv2
%matplotlib inline

def show_grid_images(image_path_list, ncols=8, augmentor=None, title=None):
    figure, axs = plt.subplots(figsize=(22,6),nrows=1, ncols=ncols)
    for i in range(ncols):
        image = cv2.cvtColor(cv2.imread(image_path_list[i]),cv2.COLOR_BGR2RGB)
        axs[i].imshow(image)
        axs[i].set_title(title)
        
dog_image_list = data_df[data_df['label']=='DOG']['path'].iloc[:6].tolist()
show_grid_images(dog_image_list, ncols=6, title='DOG')

cat_image_list = data_df[data_df['label']=='CAT']['path'].iloc[:6].tolist()
show_grid_images(cat_image_list, ncols=6, title='CAT')

In [None]:
for image_path in dog_image_list:
    image = cv2.cvtColor(cv2.imread(image_path),cv2.COLOR_BGR2RGB)
    print(image.shape)

In [None]:
print(image[:,0,0].shape)

print(image)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_gen = ImageDataGenerator(horizontal_flip=True, rescale=1/255.0)

train_flow_gen = train_gen.flow_from_directory(directory = '/kaggle/input/cat-and-dog/training_set/training_set'
                                               ,target_size=(224,224)
                                               ,class_mode = 'categorical'
                                               ,batch_size=64)

In [None]:
image_array, label_array = next(train_flow_gen)
print(image_array.shape, label_array.shape)
print(image_array[:1], label_array[:1])

In [None]:
images_array = next(train_flow_gen)[0]
labels_array = next(train_flow_gen)[1]

print('##### image array shape:', images_array.shape)
print('#### label array shape:', labels_array.shape)

In [None]:
labels_array

In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 64

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense , Conv2D , Dropout , Flatten , Activation, MaxPooling2D , GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam , RMSprop 
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau , EarlyStopping , ModelCheckpoint , LearningRateScheduler
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import Xception

def create_model(model_name='vgg16', verbose=False):
    
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    if model_name == 'vgg16':
        base_model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'resnet50':
        base_model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'xception':
        base_model = Xception(input_tensor=input_tensor, include_top=False, weights='imagenet')
    
    bm_output = base_model.output

    x = GlobalAveragePooling2D()(bm_output)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu', name='fc1')(x)
    # 개와 고양이 2 종류이므로 Dense의 units는 2
    output = Dense(2, activation='softmax', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    
    if verbose:
        model.summary()
        
    return model

In [None]:
model = create_model(model_name='xception',verbose=True)
model.compile(optimizer = Adam(0.001),loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
train_flow_gen.samples

In [None]:
model.fit(train_flow_gen, epochs=15, steps_per_epoch = int(np.ceil(train_flow_gen.samples/BATCH_SIZE)))

In [None]:
test_gen = ImageDataGenerator(rescale=1/255)
test_flow_gen = test_gen.flow_from_directory(directory='/kaggle/input/cat-and-dog/test_set/test_set' # image file이 있는 디렉토리 
                                                         ,target_size=(IMAGE_SIZE, IMAGE_SIZE) # 원본 이미지를 최종 resize할 image size
                                                         ,class_mode='categorical' # 문자열 label을 자동으로 one-hot encoding 시켜줌. 
                                                         ,batch_size=BATCH_SIZE, shuffle=False)

In [None]:
model.evaluate(test_flow_gen)

In [None]:
data_df = pd.DataFrame({'path':paths, 'dataset':dataset_gubuns, 'label':label_gubuns})
data_df.head(3)

In [None]:
train_df = data_df[data_df['dataset']=='train']
test_df = data_df[data_df['dataset']=='test']

In [None]:
print(train_df.shape, test_df.shape)

In [None]:
from sklearn.model_selection import train_test_split

tr_df, val_df = train_test_split(train_df, test_size=0.15, stratify=train_df['label'],random_state=2021)
print(tr_df.shape, val_df.shape)

In [None]:
print(tr_df['label'].value_counts())
print(val_df['label'].value_counts())

In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 64

In [None]:
tr_generator = ImageDataGenerator(horizontal_flip=True, rescale=1/225.0)

tr_flow_gen = tr_generator.flow_from_dataframe(dataframe=tr_df
                                              ,x_col = 'path'
                                              ,y_col = 'label'
                                              ,target_size = (IMAGE_SIZE,IMAGE_SIZE)
                                              ,class_mode = 'binary'
                                              ,batch_size = BATCH_SIZE
                                              ,shuffle=True)

In [None]:
images_array = next(tr_flow_gen)[0]
labels_array = next(tr_flow_gen)[1]

print(images_array.shape, labels_array.shape)
print(images_array[0], labels_array[0])

In [None]:
val_generator = ImageDataGenerator(rescale=1/255.)

val_flow_gen = val_generator.flow_from_dataframe(dataframe=val_df
                                                ,x_col='path'
                                                ,y_col='label'
                                                ,target_size=(IMAGE_SIZE,IMAGE_SIZE)
                                                ,class_mode = 'binary'
                                                ,batch_size = BATCH_SIZE
                                                ,shuffle=False)

In [None]:
def create_model(model_name='vgg16', verbose=False):
    
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    if model_name == 'vgg16':
        base_model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'resnet50':
        base_model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'xception':
        base_model = Xception(input_tensor=input_tensor, include_top=False, weights='imagenet')
    
    bm_output = base_model.output

    x = GlobalAveragePooling2D()(bm_output)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu', name='fc1')(x)
    # 최종 output 출력을 softmax에서 sigmoid로 변환. 
    output = Dense(1, activation='sigmoid', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    
    if verbose:
        model.summary()
        
    return model

In [None]:
model = create_model(model_name='xception')
model.compile(optimizer=Adam(0.001),loss='binary_crossentropy',metrics=['accuracy'])

rlr_cb = ReduceLROnPlateau(monitor='val_loss',factor=0.2,patience=3, mode='min',verbose=1)
ely_cb = EarlyStopping(monitor='val_loss',patience=5, mode='min',verbose=1)

In [None]:
N_EPOCHS=10

model.fit(tr_flow_gen, epochs = N_EPOCHS, steps_per_epoch = int(np.ceil(tr_df.shape[0]/BATCH_SIZE)),
         validation_data = val_flow_gen, validation_steps = int(np.ceil(val_df.shape[0]/BATCH_SIZE)),
         callbacks=[rlr_cb, ely_cb])

In [None]:
test_generator = ImageDataGenerator(rescale=1/255.0)
test_flow_gen= test_generator.flow_from_dataframe(dataframe = test_df
                                                 ,x_col='path'
                                                 ,y_col='label'
                                                 ,target_size=(IMAGE_SIZE,IMAGE_SIZE)
                                                 ,class_mode = 'binary'
                                                 ,batch_size=BATCH_SIZE
                                                 ,shuffle=False)

model.evaluate(test_flow_gen)

In [None]:
import cv2

image = cv2.cvtColor(cv2.imread(data_df['path'].iloc[0]),cv2.COLOR_BGR2RGB)
plt.imshow(image)

In [None]:
def preprocessing_scaling(x, mode='tf'):
    if mode=='tf':
        x = x/127.5
        x -= 1.0
    elif mode=='torch':
        x = x/255.
        mean = [0.485,0.456,0.406]
        std = [0.229,0.224,0.225]
        
        x[:,:,0] = (x[:,:,0]-mean[0])/std[0]
        x[:,:,1] = (x[:,:,1]-mean[1])/std[1]
        x[:,:,2] = (x[:,:,2]-mean[2])/std[2]
        
    return x

scaled_image_tf = preprocessing_scaling(image, mode='tf')
scaled_image_torch = preprocessing_scaling(image, mode='torch')

In [None]:
def show_pixel_histogram(image):
    fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(16,6))
    
    for i in range(3):
        axs[i].hist(image[:,:,i].flatten(), bins=100, alpha=0.5)
        axs[i].legend(loc='upper right')
        
        if i==0:
            title_str = 'Red'
        elif i==1:
            title_str = 'Green'
        else:
            title_str = 'Blue'
            
        axs[i].set(title = title_str)
        
show_pixel_histogram(scaled_image_tf)
show_pixel_histogram(scaled_image_torch)
show_pixel_histogram(image/255.0)

In [None]:
from tensorflow.keras.applications.xception import preprocess_input

scaled_image_xception = preprocess_input(image) 
show_pixel_histogram(scaled_image_xception)

In [None]:
from tensorflow.keras.applications.densenet import preprocess_input

scaled_image_densenet = preprocess_input(image)
show_pixel_histogram(scaled_image_densenet)

In [None]:
from tensorflow.keras.applications.xception import preprocess_input

tr_generator = ImageDataGenerator(horizontal_flip=True, preprocessing_function = preprocess_input)
tr_flow_gen = tr_generator.flow_from_dataframe(dataframe = tr_df
                                              ,x_col='path'
                                              ,y_col='label'
                                              ,target_size = (IMAGE_SIZE, IMAGE_SIZE)
                                              ,class_mode = 'binary'
                                              ,batch_size=BATCH_SIZE
                                              ,shuffle=True)

In [None]:
images_array = next(tr_flow_gen)[0]
labels_array = next(tr_flow_gen)[1]

show_pixel_histogram(images_array[0])