# Library

In [None]:
from Module.Global_variable import os, time, torch, np, pd, plt, K_SIZE
from Module.Convenience_Function import save_pickle, draw_img_and_bbox_torch_style
from Module.Convenience_Function_by_torch import state_dict_to_np_array_dict

from Module.process1_index_dictionary_maker import get_index_dictionary
from Module.process1_torch_basic_style_model import get_my_torch_model, get_optimizer
from Module.process1_main_process import setting_dict, pixed_hyperParameter_dict, hyperParameter_dict

In [None]:
import math
import sys
import time
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

from Module import utils
from Module.Convenience_Function import progressbar, time_checker

# Environment variable

In [None]:
# `torch._dynamo`의 캐시 한계를 늘림.
## Pytorch의 동적 컴파일러 설정 조정.
## 기본 캐시 크기인 64를 128로 늘림.
## 캐시에 저장할 수 있는 최적화된 코드 조각들의 최대 개수를 의미함.
## 확인 결과 RAM의 buff/cache memory에 할당되는 것 확인.
## 512 설정 시, 4GB 가량 늘어났음 buff/cache 172G -> 176G
torch._dynamo.config.cache_size_limit = 64

# Global Variable

In [None]:
# 변동 가능한 전역 변수
########################################################
# Index dictionary를 새로 만들 것인지.
MAKE_NEW_INDEX_DICT = False
########################################################


# Process 초기값 설정
########################################################
GPU_NUMBER = 3

torch.cuda.set_device(GPU_NUMBER)
process_key_dict=dict()
process_key_dict["setting"]=setting_dict(
    model_key="basic_model",
    gpu_number=GPU_NUMBER,
    idx_dict=get_index_dictionary(process_boolean=MAKE_NEW_INDEX_DICT).process(),
    optimizer="Adam",
    verbose=True
)
process_key_dict["pixed_hyperParameter"]=pixed_hyperParameter_dict()
process_key_dict["hyperParameter"]=hyperParameter_dict(
    learing_rate=0.001, 
    weight_decay=0.05,
    T_0=20,
    T_mult=2,
    eta_min=0.000001
)
########################################################

# Function

In [None]:
class model_training_process:
    
    def __init__(self, key_dict, use_torch2_compile=False):
        
        self.setting_dict = key_dict["setting"]
        self.pixed_hp_dict = key_dict["pixed_hyperParameter"]
        self.hp_dict = key_dict["hyperParameter"]
        self.use_torch2_compile = use_torch2_compile
        
        self.train_loader = None
        self.valid_loader = None
        self.test_loader = None
        
        self.model = None
        self.optimizer = None
        self.scheduler = None
        self.scaler = None
        
        
        
    def Load_k_fold_dataLoader(self, k):
        
        # Data Loader들을 가지고 온다.
        self.train_loader, self.valid_loader, self.test_loader =\
        self.setting_dict["loader"].get_all_torch_dataLoader(k)
        
        
        
    def Load_model_and_optimizer(self):
        
        # Model을 가지고 온다.
        model = get_my_torch_model(
            class_num=self.setting_dict["num_class"], key=self.setting_dict["model_key"]
        ).process().to(self.setting_dict["device"])
        # torch 2.0의 compile을 사용할지 여부.
        self.model = torch.compile(model) if self.use_torch2_compile else model
        
        # Optimizer 정의
        self.optimizer = get_optimizer(
            self.model,
            learning_Rate=self.hp_dict["learning_rate"],
            weight_decay=self.hp_dict["weight_decay"],
            opt_key=self.setting_dict["optimizer"]
        )
        # Scheduler 정의
        self.scheduler = CosineAnnealingWarmRestarts(
            optimizer=self.optimizer,
            T_0=self.hp_dict["T_0"],
            T_mult=self.hp_dict["T_mult"],
            eta_min=self.hp_dict["eta_min"]
        )
        # scaler 정의 - AMP를 위하여
        self.scaler = torch.cuda.amp.GradScaler()
        
        
        
    def data_upload_to_device(self, image_list, target_list):
        
        # 모든 image들을 device에 올린다.
        stack_device_imgs = []
        for image in image_list:
            stack_device_imgs.append(image.to(self.setting_dict["device"]))

        # 모든 target들을 device에 올린다(Tensor로 출력되지 않은 id는 제외).
        stack_device_targets = []
        for target in target_list:
            device_target = dict()
            for key, value in target.items():
                if isinstance(value, torch.Tensor):
                    device_target[key] = value.to(self.setting_dict["device"])
                else:
                    device_target[key] = value
            stack_device_targets.append(device_target)
            
        return stack_device_imgs, stack_device_targets
    
    
    
    def gradients_scaling_with_clipping(self, losses):
        # 그라디언트 스케일링 및 역전파
        self.scaler.scale(losses).backward()
        # Gadient update 전에 Gradient clipping 적용
        ###############################################################################
        ## AMP 사용 시, Gradient clipping은 Scaling 역산 후 수행되어야 함
        self.scaler.unscale_(self.optimizer)  # 스케일링 역산
        torch.nn.utils.clip_grad_norm_(
            self.model.parameters(), max_norm=self.pixed_hp_dict["max_norm"]
        )
        ###############################################################################
        # 스케일링된 그라디언트로 파라미터 업데이트
        self.scaler.step(self.optimizer)
        # 스케일러 업데이트
        self.scaler.update()


        
    def normal_gradient_descent_with_clipping(self, losses):
        losses.backward()
        # Gadient update 전에 Gradient clipping 적용
        ###############################################################################
        torch.nn.utils.clip_grad_norm_(
            self.model.parameters(), max_norm=self.pixed_hp_dict["max_norm"]
        )
        ###############################################################################
        # gradient update
        self.optimizer.step()

# Process

In [None]:
for k in range(K_SIZE):
    
    # Model 학습을 위한 Instance 생성
    MT_Ob = model_training_process(key_dict=process_key_dict, use_torch2_compile=False)
    # k-fold에 해당하는 Data Loader를 가지고 온다.
    MT_Ob.Load_k_fold_dataLoader(k)
    # Model, Optimizer, Scheduler 등 설정
    MT_Ob.Load_model_and_optimizer()
    
    
    break

In [None]:
print_freq = 5

In [None]:
max_epoch = MT_Ob.pixed_hp_dict['epochs']
for epoch in range(MT_Ob.pixed_hp_dict["epochs"]):
    print(f"Epoch: {epoch} / {max_epoch}")
    
    # model을 학습 상태로 정의
    MT_Ob.model.train()

    #####################################################################################
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = f"Epoch: [{epoch}]"
    #####################################################################################

    iteration_num = 0
    for imgs, targets, in metric_logger.log_every(MT_Ob.train_loader, print_freq, header):
    # for imgs, targets, in progressbar(MT_Ob.train_loader, prefix=f"iteration: ", verbose=MT_Ob.setting_dict["verbose"]):

        start_iter = time.time()

        # 학습에 사용될 데이터들을 device로 올린다.
        imgs, targets = MT_Ob.data_upload_to_device(imgs, targets)
        # 자동혼합정밀도(AMP)의 autocast 사용 여부
        with torch.cuda.amp.autocast(enabled=MT_Ob.setting_dict["use_amp"]):
            loss_dict = MT_Ob.model(imgs, targets)
            # loss_dict에 있는 모든 loss(loss_classification, loss_box_reg, loss_objectness, loss_rpn_box_reg)의 값들을 합친다.
            losses = sum(loss for loss in loss_dict.values())

        ###################################################################################
        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict_reduced)
            sys.exit(1)
        ###################################################################################


        # 손실값을 기반으로 경사 하강 실시.
        MT_Ob.optimizer.zero_grad()
        # GradScaler를 적용하여, Autocast 사용 시 발생할 수 있는 Underflow나 Overflow 문제 완화
        if MT_Ob.setting_dict["use_amp"]:
            MT_Ob.gradients_scaling_with_clipping(losses)
        else:
            MT_Ob.normal_gradient_descent_with_clipping(losses)

        # CosineAnnealingWarmRestarts를 사용하였으므로, Iteration 안에서 적용
        MT_Ob.scheduler.step()

        ######################################################################################
        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=MT_Ob.optimizer.param_groups[0]["lr"])
        ######################################################################################



        ######################################################################################
        print(f"{iteration_num}: {time_checker(start_iter)}")
        iteration_num += 1
        ######################################################################################


In [None]:
# %%time
# # model을 학습 상태로 정의
# MT_Ob.model.train()

# #####################################################################################
# metric_logger = utils.MetricLogger(delimiter="  ")
# metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
# header = f"Epoch: [{epoch}]"
# #####################################################################################

# iteration_num = 0
# for imgs, targets, in metric_logger.log_every(MT_Ob.train_loader, print_freq, header):
# # for imgs, targets, in progressbar(MT_Ob.train_loader, prefix=f"iteration: ", verbose=MT_Ob.setting_dict["verbose"]):
    
#     # 학습에 사용될 데이터들을 device로 올린다.
#     imgs, targets = MT_Ob.data_upload_to_device(imgs, targets)
#     # 자동혼합정밀도(AMP)의 autocast 사용 여부
#     with torch.cuda.amp.autocast(enabled=MT_Ob.setting_dict["use_amp"]):
#         loss_dict = MT_Ob.model(imgs, targets)
#         # loss_dict에 있는 모든 loss(loss_classification, loss_box_reg, loss_objectness, loss_rpn_box_reg)의 값들을 합친다.
#         losses = sum(loss for loss in loss_dict.values())
        
#     ###################################################################################
#     # reduce losses over all GPUs for logging purposes
#     loss_dict_reduced = utils.reduce_dict(loss_dict)
#     losses_reduced = sum(loss for loss in loss_dict_reduced.values())

#     loss_value = losses_reduced.item()

#     if not math.isfinite(loss_value):
#         print(f"Loss is {loss_value}, stopping training")
#         print(loss_dict_reduced)
#         sys.exit(1)
#     ###################################################################################
    
        
#     # 손실값을 기반으로 경사 하강 실시.
#     MT_Ob.optimizer.zero_grad()
#     # GradScaler를 적용하여, Autocast 사용 시 발생할 수 있는 Underflow나 Overflow 문제 완화
#     if MT_Ob.setting_dict["use_amp"]:
#         MT_Ob.gradients_scaling_with_clipping(losses)
#     else:
#         MT_Ob.normal_gradient_descent_with_clipping(losses)

#     # CosineAnnealingWarmRestarts를 사용하였으므로, Iteration 안에서 적용
#     MT_Ob.scheduler.step()
    
#     ######################################################################################
#     metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
#     metric_logger.update(lr=MT_Ob.optimizer.param_groups[0]["lr"])
#     ######################################################################################

In [None]:
# # model을 가지고 온다.
# model = get_model_FineTuning_classSize()

# # train model
# imgs, targets = next(iter(train_loader))
# output = model(imgs, targets)   # Returns losses and detections

# # model predict
# model.eval()
# x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
# predictions = model(x)