In [15]:
import os
import math

import cv2
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations

In [16]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, batch_size, csv_path, image_size, fold, mode, shuffle=True):
        self.batch_size = batch_size
        self.image_size = image_size
        self.fold = fold
        self.mode = mode
        self.shuffle = shuffle
        
        self.df = pd.read_csv(csv_path)
        
        if self.mode == "train":
            self.df = self.df[self.df["fold"] != self.fold]
        elif self.mode == "val":
            self.df = self.df[self.df["fold"] == self.fold]
        
        #### Remove invalid files
        #### https://github.com/tensorflow/models/issues/3134
        invalid_filenames = [
            'Egyptian_Mau_14',
            'Egyptian_Mau_139',
            'Egyptian_Mau_145',
            'Egyptian_Mau_156',
            'Egyptian_Mau_167',
            'Egyptian_Mau_177',
            'Egyptian_Mau_186',
            'Egyptian_Mau_191',
            'Abyssinian_5',
            'Abyssinian_34',
            'chihuahua_121',
            'beagle_116'
        ]
        self.df = self.df[~self.df['file_name'].isin(invalid_filenames)]
        
        self.on_epoch_end()

    def __len__(self):
        return math.ceil(len(self.df) / self.batch_size)

    def __getitem__(self, idx):
        start = idx * self.batch_size
        fin = (idx+1) * self.batch_size
        data = self.df.iloc[start : fin]
        
        batch_x, batch_y = self.get_data(data)
        
        return np.array(batch_x), np.array(batch_y)
    
    def get_data(self, data):
        batch_x = []
        batch_y = []
        
        for _, j in data.iterrows():
            file_name = j["file_name"]
            
            image = cv2.imread(f"../data/images/{file_name}.jpg")
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            image = cv2.resize(image, (self.image_size, self.image_size))
            image = image / 255.
            
            label = int(j["species"]) - 1
            
            batch_x.append(image)
            batch_y.append(label)
        
        return batch_x, batch_y
    
    def on_epoch_end(self):
        if self.shuffle:
            self.df = self.df.sample(frac=1).reset_index(drop=True)


In [17]:
def get_sequential_model(input_shape):
    model = keras.Sequential(
        [
            layers.Input(input_shape),

            # 1st Conv Block
            layers.Conv2D(64, 3, strides=1, activation="relu", padding="same"),
            layers.Conv2D(64, 3, strides=1, activation="relu", padding="same"),
            layers.MaxPool2D(),
            layers.BatchNormalization(),
            layers.Dropout(0.5),

            # 2st Conv Block
            layers.Conv2D(128, 3, strides=1, activation="relu", padding="same"),
            layers.Conv2D(128, 3, strides=1, activation="relu", padding="same"),
            layers.MaxPool2D(),
            layers.BatchNormalization(),
            layers.Dropout(0.3),

            layers.GlobalMaxPool2D(),
            layers.Dense(128, activation="relu"),
            layers.Dense(1, activation="sigmoid")
        ]
    )
    
    return model

input_shape = (256, 256, 3)
model = get_sequential_model(input_shape)

model.compile(
    optimizer = "adam",
    loss = "binary_crossentropy",
    metrics = "accuracy"
)

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 256, 256, 64)      1792      
                                                                 
 conv2d_5 (Conv2D)           (None, 256, 256, 64)      36928     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 128, 128, 64)     0         
 2D)                                                             
                                                                 
 batch_normalization_2 (Batc  (None, 128, 128, 64)     256       
 hNormalization)                                                 
                                                                 
 dropout_2 (Dropout)         (None, 128, 128, 64)      0         
                                                                 
 conv2d_6 (Conv2D)           (None, 128, 128, 128)    

In [18]:
##### 학습에 사용될 generator와 validation에 사용될 객체 생성

csv_path = "../data/kfolds.csv"

train_generator = DataGenerator(
    batch_size=128, 
    csv_path = csv_path, 
    image_size=256, 
    fold=1, 
    mode="train", 
    shuffle=True
)

valid_generator = DataGenerator(
    batch_size=128, 
    csv_path = csv_path, 
    image_size=256, 
    fold=1, 
    mode="val", 
    shuffle=True
)

In [19]:
# 0.1 씩 곱해준다 10번까지 foactor = 0.1 patience=10 , verbose =1 ,

# save_best_only 가장 좋은성적만 저장해라.

In [20]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=3, verbose=1, mode="min", restore_best_weights=False
)

reduce_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss", factor=0.1, patience=10, verbose=1, mode="min", min_lr=0.001
)

filepath = "{epoch:02d}-{val_loss:.2f}.hdf5"
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath, monitor="val_loss", verbose=1, save_best_only=True, save_weights_only=False,
    mode="min"
)

In [21]:
history = model.fit(
                train_generator,
                validation_data = valid_generator,
                epochs=10,
                callbacks = [
                    early_stopping,
                    reduce_on_plateau,
                    model_checkpoint
                ],
                verbose=1
            )

error: OpenCV(4.5.5) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [22]:
import matplotlib.pyplot as plt
history = history.history

plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(history['loss'], label='train')
plt.plot(history['val_loss'], label='val')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title("Loss")

plt.subplot(1, 2, 2)
plt.plot(history['accuracy'], label='train')
plt.plot(history['val_accuracy'], label='val')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title("Accuracy")
plt.show()


NameError: name 'history' is not defined