# Library

In [1]:
from Module.Global_variable import os, time, torch, np, pd, plt

from Module.utils.Convenience_Function import save_pickle, draw_img_and_bbox_torch_style, time_checker

import Module.process1.preparatory_items as p1pi
from Module.process1.index_dictionary_maker import get_index_dictionary
from Module.process1.torch_dataset import get_my_dataLoader
from Module.process1.torch_basic_style_model import get_my_torch_model, get_optimizer

# Global Variable

In [2]:
# Process 시작 전 정의 사항
#####################################################################################
# directory 생성
_MAKE_NEW_DIRECTORY = False
# Index dictionary를 새로 생성할 것인지
_MAKE_NEW_INDEX_DICT = False
# Log를 출력할 것인지
_VERBOSE = True
# 사용할 GPU 번호
_GPU_NUMBER = 3
torch.cuda.set_device(_GPU_NUMBER)   # set basic gpu
# torch._dynamo 캐시 한계치 조정
torch._dynamo.config.cache_size_limit = 64   # Default
#####################################################################################


# 학습 간 전역 변수 설정
#####################################################################################
# process 기초 디렉터리 생성
p1pi.make_process_start_dir(makes_new=_MAKE_NEW_DIRECTORY)

# index dictionary with Data Loader
_IDX_DICT = get_index_dictionary(process_boolean=_MAKE_NEW_INDEX_DICT).process()
_LOADER = get_my_dataLoader(_IDX_DICT)
#####################################################################################


# Model 학습 관련 변수 설정
#####################################################################################
_PROCESS_SET_DICT = p1pi.get_process_set_dict(
    save_iter_time_log=True,   # iteration의 time log 저장 여부
    verbose=_VERBOSE
)
_MODEL_SET_DICT = p1pi.get_model_set_dict(
    model_key="faster_fpn",
    faster_bb_key="resnet50_v2",
    optimizer_key="Adam"
)
_HYPER_PARAMS_DICT = p1pi.get_HP_set_dict(
    learing_rate=0.00005, weight_decay=0.005,
    T_0=20, T_mult=2, eta_min=0.0000001
)
#####################################################################################

# Function

In [3]:
import math, sys, time, json
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from Module.utils.Convenience_Function import time_checker
from Module.utils.Convenience_Function_by_torch import torch_device, state_dict_to_np_array_dict
from Module.utils.log_utils import my_progressbar_time_log

In [4]:
class loss_handler:
    
    def __init__(self):
        
        self.loss = []
        self.loss_classifier = []
        self.loss_box_reg = []
        self.loss_objectness = []
        self.loss_rpn_box_reg = []
        
        
    def loss_stack(self, loss_dict):
        
        self.loss.append(sum(loss for loss in loss_dict.values()).item())
        self.loss_classifier.append(loss_dict['loss_classifier'].item())
        self.loss_box_reg.append(loss_dict['loss_box_reg'].item())
        self.loss_objectness.append(loss_dict['loss_objectness'].item())
        self.loss_rpn_box_reg.append(loss_dict['loss_rpn_box_reg'].item())
        
        
    def calculate_loss_averate(self):
        result = {
            "loss":np.mean(self.loss),
            "loss_classifier":np.mean(self.loss_classifier),
            "loss_box_reg":np.mean(self.loss_box_reg),
            "loss_objectness":np.mean(self.loss_objectness),
            "loss_rpn_box_reg":np.mean(self.loss_rpn_box_reg),
        }
        return result
    
    
    def make_loss_sentence(self, aver_loss_dict, spend_time):
        loss_sentence = " [time] %s [Loss] total: %s, classifier: %s, box_reg: %s, objectness: %s, rpn_box_reg: %s" % (
            spend_time,
            "{:1.4f}".format(aver_loss_dict["loss"]),
            "{:1.4f}".format(aver_loss_dict["loss_classifier"]),
            "{:1.4f}".format(aver_loss_dict["loss_box_reg"]),
            "{:1.4f}".format(aver_loss_dict["loss_objectness"]),
            "{:1.4f}".format(aver_loss_dict["loss_rpn_box_reg"])
        )
        return loss_sentence
    
    
    def make_loss_log(self):
        result = {
        "loss":self.loss,
        "loss_classifier":self.loss_classifier,
        "loss_box_reg":self.loss_box_reg,
        "loss_objectness":self.loss_objectness,
        "loss_rpn_box_reg":self.loss_rpn_box_reg
        }
        return result




class model_train_and_evaluate:
    
    
    def __init__(self, p_set_dict, m_set_dict, hp_dict, loader, gpu_num, use_compile=False, log_freq=5):
        
        self.p_set_dict = p_set_dict
        self.m_set_dict = m_set_dict
        self.hp_dict = hp_dict
        self.loader = loader
        self.gpu_num = gpu_num
        self.device = torch_device().get_device(gpu_number=gpu_num)
        self.use_compile = use_compile
        self.log_freq = log_freq
        
        self.train_loader = None
        self.valid_loader = None
        self.test_loader = None
        self.model = None
        self.optimizer = None
        self.scheduler = None
        self.scaler = None
        self.train_log_path = None
        
        
        
    def import_model(self):
        # Model 정의
        model = get_my_torch_model(
            class_num=self.m_set_dict['num_class'],
            model_key=self.m_set_dict['model_key'],
            faster_bb_key=self.m_set_dict['faster_bb_key']
        ).process().to(self.device)
        # torch 2.0의 compile 사용 여부 - Object detection model에서 속도가 느려지는 이슈가 있었음.
        self.model = torch.compile(model) if self.use_compile else model
        
        
        
    def import_optimizer_and_scheduler(self):
        
        # Optimizer 정의
        self.optimizer = get_optimizer(
            self.model,
            learning_rate = self.hp_dict['learning_rate'],
            weight_decay=self.hp_dict['weight_decay'],
            opt_key=self.m_set_dict['optimizer']
        )
        # Scheduler 정의
        self.scheduler = CosineAnnealingWarmRestarts(
            optimizer=self.optimizer,
            T_0=self.hp_dict['T_0'],
            T_mult=self.hp_dict['T_mult'],
            eta_min=self.hp_dict['eta_min']
        )
        # scaler(AMP) 정의 - Autocast 후 Gradient scaling 적용.
        self.scaler = torch.cuda.amp.GradScaler()
        
        

    
    # 학습 관련 코드
    #################################################################################
    def model_train_process(self, epoch):
        
        # Log Instance 생성
        Log_Ins = my_progressbar_time_log(
            header="Epochs: %i/%i" % (epoch+1, self.m_set_dict["epochs"]),
            verbose=self.p_set_dict['verbose'],
            sep_next=False
        )
        # Loss 전처리기 Instance 생성
        Loss_Ins = loss_handler()
        
        self.model.train()   # model train
        
        start_time = time.time()
        iteration = 0
        for imgs, targets in Log_Ins.with_time_log(self.train_loader):

            # 1. upload to device
            imgs = self.image_list_upload_to_device(img_list=imgs)
            targets = self.target_list_upload_to_device(target_list=targets)

            # 2. model training - AMP
            with torch.cuda.amp.autocast(enabled=self.m_set_dict["use_AMP"]):
                loss_dict = self.model(imgs, targets)
                # sum all loss_dict's loss(loss_classification, loss_box_reg, loss_objectness, loss_rpn_box_reg)
                losses = sum(loss for loss in loss_dict.values())
                
            # 3. back propagation
            self.back_propagation(losses)

            # 4. Log
            if self.p_set_dict['save_iter_time_log']:
                log = self.make_iteration_time_log(epoch, iteration, time_log_ins=Log_Ins)
            # 4.1. Loss 정리 - Log로 출력 또는 
            Loss_Ins.loss_stack(loss_dict)
            iteration += 1

        if self.p_set_dict['verbose']:
            print(Loss_Ins.make_loss_sentence(Loss_Ins.calculate_loss_averate(), time_checker(start_time)))
            
        return Loss_Ins
    
    
    
    def image_list_upload_to_device(self, img_list):
        # mini-batch의 image들은 list로 묶여 있음. 각각 .to(device) 정의
        return [img.to(self.device) for img in img_list]
    
    
    
    def target_list_upload_to_device(self, target_list):
        result = []
        for target in target_list:
            device_dict = dict()
            for key, value in target.items():
                device_dict[key] = value.to(self.device) if isinstance(value, torch.Tensor) else value
            result.append(device_dict)
        return result
                
        
        
    def back_propagation(self, losses):
        self.optimizer.zero_grad()
        # 3.1. AMP - GradScale use or not
        if self.m_set_dict["use_AMP"]:
            self.gradient_scaled_parameter_update_with_clipping(losses=losses)
        else:
            self.parameter_update_with_clipping(losses=losses)
        # 3.2. step scheduler - CosinAnnelingWarmRestarts(Iterantion scheduler)
        self.scheduler.step()
        
        
        
    def gradient_scaled_parameter_update_with_clipping(self, losses):
        # Gradient scaling with back propagation
        self.scaler.scale(losses).backward()
        # Gradient update 전에 Gradient clipping 적용
        #################################################################
        # AMP 사용 시, Gradient clipping은 scaling 역산 후 적용되어야 한다.
        self.scaler.unscale_(self.optimizer) # Scaling 역산
        torch.nn.utils.clip_grad_norm_(
            self.model.parameters(), max_norm=self.m_set_dict['max_norm']
        )
        #################################################################
        # The parameters are updated using a scaled gradient
        self.scaler.step(self.optimizer)
        # scaler update
        self.scaler.update()
        
        
        
    def parameter_update_with_clipping(self, losses):
        losses.backward()
        # Gradient update 전 Gradient clipping 적용
        #################################################################
        torch.nn.utils.clip_grad_norm_(
            self.model.parameters(), max_norm=self.m_set_dict['max_norm']
        )
        ################################################################
        # parameter update
        self.optimizer.step()
    #################################################################################
        
        
        
    # Log 관련 코드
    #################################################################################
    # []로 구성된 json log 파일 생성
    def make_log_file(self, k):
        if self.p_set_dict['save_iter_time_log']:
            self.train_log_path = self.p_set_dict['train_log_key'] + "_" + str(self.gpu_num) + f"_{k}.json"
            with open(self.train_log_path, 'w') as file:
                json.dump([], file)
        
        
    # 한 iteration에 대한 시간 log 생성
    def make_iteration_time_log(self, epoch, iteration, time_log_ins):
        # log_instance의 time들을 모두 해당 시점으로 update한다.
        time_log_dict = time_log_ins.put_it_at_the_end_of_the_iteration_process()
        # log 생성
        if (iteration%self.log_freq == 0) or (iteration==time_log_ins.iter_size-1):
            log = self.iter_log_dictionary_maker(epoch, iteration, time_log_dict)
            self.overwrite_train_log(log)

            
    # log 파일을 불러와서 신규 로그를 추가한다.
    def overwrite_train_log(self, log):

        # json file을 읽는다.
        with open(self.train_log_path, 'r') as file:
            data = json.load(file)
        # 신규 log 추가
        data.append(log)
        # 변경된 내용 저장
        with open(self.train_log_path, 'w') as file:
            json.dump(data, file)  # indent=4는 들어쓰기로, json의 가독성을 올려준다.
            
            
    # log dictionary를 만든다.
    def iter_log_dictionary_maker(self, epoch, iteration, time_log_dict):

        log = dict()
        log["epoch"] = epoch
        log["iteration"] = iteration
        log["lr"] = "{:1.8f}".format(self.optimizer.param_groups[0]["lr"])
        # 시간 관련 변수 추가
        log["eta"] = time_log_dict['eta']
        log["elapsed"] = time_log_dict['stack']
        log["load"] = time_log_dict['data_load']
        log["iter_train"] = time_log_dict['iter_train']
        return log
    #################################################################################

# Process

In [5]:
for k in range(_PROCESS_SET_DICT["k_size"]):
    
    # Model 학습을 위한 Instance 생성
    MTnE_Ob = model_train_and_evaluate(
        p_set_dict=_PROCESS_SET_DICT, m_set_dict=_MODEL_SET_DICT, hp_dict=_HYPER_PARAMS_DICT,
        loader=_LOADER, gpu_num=_GPU_NUMBER
    )
    # get DataLoader
    MTnE_Ob.train_loader, MTnE_Ob.valid_loader, MTnE_Ob.test_loader\
    = MTnE_Ob.loader.get_all_torch_dataLoader(k)
    
    # Model 정의
    MTnE_Ob.import_model()
    # Optimizer(Scheduler, Scaler) 정의
    MTnE_Ob.import_optimizer_and_scheduler()
    
    # 해당 Dataset에 대한 신규 Log file 생성
    MTnE_Ob.make_log_file(k)
    
    break

In [6]:
%%time

for epoch in range(MTnE_Ob.m_set_dict["epochs"]):
    
    # Model training
    Loss_Ins = MTnE_Ob.model_train_process(epoch)





CPU times: user 11h 13min 49s, sys: 4h 25min 46s, total: 15h 39min 36s
Wall time: 13h 54min 57s


In [7]:
# # model을 가지고 온다.
# model = get_model_FineTuning_classSize()

# # train model
# imgs, targets = next(iter(train_loader))
# output = model(imgs, targets)   # Returns losses and detections

# # model predict
# model.eval()
# x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
# predictions = model(x)