### 설계 전략
* file 경로, label, class1, 2, 3, ..., n으로 이루어진 key_df를 유저 자체 생성.
* key_df를 기반으로 idx_dict 생성

# Packages

In [1]:
from typing import List, Dict, Any, Tuple, Callable, Union, Optional

import sys, os, cv2, re
import torch
from torch import nn
import numpy as np
import pandas as pd
from scripts.how_to_train import *

from GGUtils.utils.path import do_or_load, GetAbsolutePath
from GGUtils.img.viewer import show_img, show_imgs
from GGDL.utils import set_seed_everything, make_basic_directory, tensor_to_img, GetDevice
from GGDL.idx_dict.key_df import make_basic_key_df, binary_label_convertor
from GGDL.idx_dict.make_dict import make_stratified_idx_dict
from GGDL.data_loader.dataset import ImgDataset, GetLoader, show_dataset_img
from GGDL.model.vision import Classification
from GGDL.model.fine_tuning import Tuner

from GGImgMorph.scenario import sample_augment      # 증강 알고리즘

# Process

In [2]:
# 초기 설정 값
# ==============================
GPU = 0
MAKE_NEW_IDX_DICT = False
WORKER = 0

# 경로 정보
# ==============================
TRAIN_SET = "/home/gooopy/programming/rawdata/dogs-vs-cats/train/"
TEST_SET = "/home/gooopy/programming/rawdata/dogs-vs-cats/test1/"
IDX_DICT_PATH = f"{SOURCE}/idx_dict.pickle"
ESTOP_PATH = f"{SOURCE}/{ESPOINT_DIR}/process{GPU}"

# Device 설정
# ==============================
GET_DEVICE = GetDevice()
DEVICE = GET_DEVICE(GPU)
torch.cuda.set_device(DEVICE)
GET_DEVICE.summary()

# Model 학습 관련 설정
# ==============================
IMG_SIZE = 224
IMG_CHANNER = 3
CLASS_SIZE = 1
RESIZE_PADDING_COLOR = "random"
BATCH_SIZE = 16
MODEL_NAME = 'vit_base_patch16_224.orig_in21k'      # baseline model


class HeaderBlock(nn.Module):
    def __init__(self, input_dim:int, output_dim:int, dropout_prob:float):
        super(HeaderBlock, self).__init__()
        self.batch_norm = nn.BatchNorm1d(input_dim)
        self.linear = nn.Linear(input_dim, output_dim)
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        x = self.batch_norm(x)
        x = self.linear(x)
        x = self.gelu(x)
        return self.dropout(x)


def custom_header(x):
    """
    Pre-Activation Batch Normalization
    - 깊은 Backbone 모델의 header이므로, Internal Covariate Shift 문제 해결을 위해 사용

    BottleNeck
    - 정보를 확장하여 중요한 정보만 남겨, 계산 효율성을 유지하면서 높은 표현력 제공
    """
    header = nn.Sequential(
        HeaderBlock(input_dim=x, output_dim=1024, dropout_prob=0.1),
        HeaderBlock(input_dim=1024, output_dim=512, dropout_prob=0.3),
        HeaderBlock(input_dim=512, output_dim=128, dropout_prob=0.5),
        nn.Linear(128, 1),
        nn.Sigmoid()
    )
    return header


CUSTOM_HEAD_FN = custom_header

CUDA is available.
GPU size: 2
------------------------------------------------------------
GPU number: 0
Name: NVIDIA GeForce RTX 3080 Ti
Computer capability: 8.6
VRAM: 12GB
------------------------------------------------------------
GPU number: 1
Name: NVIDIA GeForce GTX 750
Computer capability: 5.0
VRAM: 1GB
------------------------------------------------------------


In [3]:
# 기초 디렉터리 생성
make_basic_directory(source=SOURCE, estop_dir=ESPOINT_DIR, log=LOG, result=RESULT, make_new=False)

# key_df 생성
path_list = GetAbsolutePath(None).get_all_path(parents_path=TRAIN_SET)
key_df = make_basic_key_df(
    paths=path_list,
    labels=[re.split(r".+/", i, maxsplit=1)[1].split('.')[0] for i in path_list]
)
# label을 이진 분류로 변환
key_df['label'] = binary_label_convertor(array=key_df['label'], positive_class='dog')

# idx_dict 생성
idx_dict = do_or_load(
    savepath=IDX_DICT_PATH, makes_new=MAKE_NEW_IDX_DICT, 
    fn=make_stratified_idx_dict,
    key_df=key_df, stratified_columns=['label'], is_binary=True,
    path_col='path', label_col='label', 
    k_size=K_SIZE, test_ratio=TEST_RATIO, valid_ratio=VALID_RATIO
)

# 학습 전 모든 seed 고정
set_seed_everything(seed=SEED)

In [4]:
augments = sample_augment
batch_size = BATCH_SIZE
img_size = IMG_SIZE
worker = WORKER
resize_how = 0
resize_how_list = [2, 3, 4]
resize_padding_color = RESIZE_PADDING_COLOR

In [5]:
for k in idx_dict.keys():
    # k-fold에 대한 idx_dict
    k_idx_dict = idx_dict[k]
    break

In [6]:
# Data Loader 정의
loader = GetLoader(
    dataset_class=ImgDataset, idx_dict=k_idx_dict,
    augments=augments, batch_size=batch_size, workers=0, 
    resize=img_size, resize_how=resize_how, resize_how_list=resize_how_list,
    resize_padding_color = resize_padding_color
)
# model 정의
model = Classification(
    model_name=MODEL_NAME, pretrained=True, 
    channel=IMG_CHANNER, class_size=CLASS_SIZE,
    custom_head_fn=CUSTOM_HEAD_FN
)
# Fine tuning 방법 정의


In [7]:
for imgs, labels in loader.train:
    break

In [8]:
# """
# 구현 계획
# 1. Full fine-tuning: 아무것도 설정되지 않은 상태로, 모든 parameter를 True로 바꿔서 내보내게
# 2. Fixed Feature Extractor: %로 학습하게
# 3. Layer-wise Unfreezing: 아래에서부터 천천히 해제하게, max %를 넣어서 최대 해제량을 설정하게
# 4. Differential Learning Rate: 각 layer의 학습률을 선형 또는 로그 함수 형태로 증가 또는 감소시키게


# block 단위보다 layer 단위로 얼리자
# """

In [9]:
from GGDL.model.fine_tuning import FullFineTuning, FixedFeatureExtractor, PartialLayerFreezing

In [10]:
class LayerWiseUnfreezing(Tuner):
    def __init__(self, model, idx_df, idx_array, min, max, patience):
        self.model = model
        self.idx_df = idx_df
        self.idx_array = idx_array
        self.min = min
        self.max = max
        self.patience = patience


In [11]:
TUNER_INS = Tuner(model=model, tuning_target='layer')

In [16]:
TEST_INS = LayerWiseUnfreezing(
    model=TUNER_INS.model, idx_df=TUNER_INS.idx_df, idx_array=TUNER_INS.idx_array,
    min=0.05, max=0.95, patience=5
    )