<a href="https://colab.research.google.com/github/joony0512/Deep_Learning_Class/blob/main/Part6/P6_Ch01_CH02_03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DL development pipeline

### Data Loader -> Model Definition -> Training Script

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import math

import cv2
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import activations

# os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
tf.config.list_physical_devices()


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### model

In [None]:
def get_sequential_model(input_shape):
    model = keras.Sequential(
        [
            # Input
            layers.Input(input_shape),

            # 1st Conv block
            layers.Conv2D(64, 3, strides = 1, activation = 'relu', padding ='same'), # padding ='same'으로 이미지 in, out 같은사이즈로 유지
            layers.Conv2D(64, 3, strides = 1, activation = 'relu', padding ='same'),
            layers.MaxPool2D(),
            layers.BatchNormalization(),
            layers.Dropout(0.5),

            # 2nd Conv block
            layers.Conv2D(128, 3, strides = 1, activation = 'relu', padding ='same'), # 위 block의 maxpooling으로 이미지 사이즈 절반으로 줄고 heuristic하게 filter size 2배로 올림
            layers.Conv2D(128, 3, strides = 1, activation = 'relu', padding ='same'),
            layers.MaxPool2D(),
            layers.BatchNormalization(),
            layers.Dropout(0.3),

            # Classfier
            layers.GlobalMaxPool2D(), # 필터별 가장 큰값 반환 -> 필터수만큼 대표값 반환
            layers.Dense(128, activation = "relu"),
            layers.Dense(1, activation = "sigmoid"), # 이진분류

        ]
    )
    return model

input_shape = (256, 256, 3)
model = get_sequential_model(input_shape)

model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = 'accuracy'

)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 64)      1792      
                                                                 
 conv2d_1 (Conv2D)           (None, 256, 256, 64)      36928     
                                                                 
 max_pooling2d (MaxPooling2  (None, 128, 128, 64)      0         
 D)                                                              
                                                                 
 batch_normalization (Batch  (None, 128, 128, 64)      256       
 Normalization)                                                  
                                                                 
 dropout (Dropout)           (None, 128, 128, 64)      0         
                                                                 
 conv2d_2 (Conv2D)           (None, 128, 128, 128)     7

### dataloader

In [None]:
class DataGenerator(keras.utils.Sequence):
    def __init__(
        self,
        batch_size,
        csv_path,
        fold,
        image_size,
        mode ='train',
        shuffle = True):

        self.batch_size = batch_size
        self.image_size = image_size
        self.fold = fold
        self.mode = mode
        self.shuffle = shuffle

        self.df = pd.read_csv(csv_path)

        if self.mode == 'train':
            self.df = self.df[self.df['fold'] != self.fold]
        elif self.mode =='val':
            self.df = self.df[self.df['fold'] == self.fold]


        ### Remove invalid files
        ### https://github.com/tensorflow/models/issues/3134
        invalid_filenames = [
            'Egyptian_Mau_14',
            'Egyptian_Mau_139',
            'Egyptian_Mau_145',
            'Egyptian_Mau_156',
            'Egyptian_Mau_167',
            'Egyptian_Mau_177',
            'Egyptian_Mau_186',
            'Egyptian_Mau_191',
            'Abyssinian_5',
            'Abyssinian_34',
            'chihuahua_121',
            'beagle_116'
        ]
        self.df = self.df[~self.df['filename']. \
                         isin(invalid_filenames)]


        self.on_epoch_end()


    def __len__(self):
        return math.ceil(len(self.df)/self.batch_size)

    def __getitem__(self, idx):
        strt = idx * self.batch_size
        fin = (idx + 1)* self.batch_size
        data = self.df.iloc[strt:fin]

        batch_x, batch_y = self.get_data(data)

        return np.array(batch_x), np.array(batch_y)

    def get_data(self, data):
        batch_x =[]
        batch_y =[]

        for _, r in data.iterrows():
            file_name = r['filename']

            image = cv2.imread(f'/content/drive/MyDrive/딥러닝 정주행/P6_Ch01.이미지처리실습/data/images/{file_name}.jpg')
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # input image resize (배치로 묶기위해)
            image = cv2.resize(image, (self.image_size, self.image_size))

            # rescaling
            image = image /255.

            # 고양이 1 강아지 2 -> 고양이 0 강아지 1
            label = int(r['species']) -1

            batch_x.append(image)
            batch_y.append(label)
        return batch_x, batch_y



    def on_epoch_end(self): # callback function
        if self.shuffle:
            self.df = self.df.sample(frac =1).reset_index(drop=True)


In [None]:
csv_path = '/content/drive/MyDrive/딥러닝 정주행/P6_Ch01.이미지처리실습/kfolds.csv'
train_generator = DataGenerator(
    batch_size = 128,
    csv_path =csv_path,
    fold = 1,
    image_size = 256,
    mode ='train',
    shuffle = True
)

val_generator = DataGenerator(
    batch_size = 128,
    csv_path =csv_path,
    fold = 1,
    image_size = 256,
    mode ='val',
    shuffle = True
)

In [None]:
history = model.fit(
    train_generator,
    validation_data = val_generator,
    epochs = 10,
    callbacks = [
        early_stopping,
        reduce_on_plateau,
        model_checkpoint
    ],
    verbose =1
)

Epoch 1/10

## Callback functions

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss', patience = 3, verbose =1,
    mode = 'min', restore_best_weights = False
)

In [None]:
reduce_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(
    monitor = 'val_loss', factor = 0.1, patience =10, verbose = 1,
    mode ='train', min_lr = 0.001
) # 일종의 learning rate scheduler -> 성능이 나아지지 않으면 learning rate 1/10으로 줄임, 가장 줄인값 min_lr = 0.001



In [None]:
# checkpoint 생성
file_path = '{epoch:02d}-{val_loss:.2f}.hdf5'
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    file_path, monitor = 'val_loss', verbose =1, save_best_only =True,
    save_weights_only =False, mode ='min'
)
# val_loss 작아질때마다 저장, save_weights_only =False 모델 구조까지 저장

In [None]:
history.history

In [None]:
import matplotlib.pyplot as plt
history = history.history

plt.figure(figsize = (15,5))
plt.subplot(1,2,1)
plt.plot(history['loss'], label ='train')
plt.plot(history['val_loss'], label = 'val')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Loss')

plt.subplot(1,2,2)
plt.plot(history['accuracy'], label ='train')
plt.plot(history['val_accuracy'], label = 'val')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Accuracy')
plt.show
