In [3]:
import pandas as pd
from sklearn.exceptions import ConvergenceWarning

import aidd.sys.config as cfg
from aidd.utils.data_io import get_modeling_data
from aidd.utils.data_io import read_data, save_data
from aidd.utils.data_io import read_pickle, save_pickle
from aidd.modeling.evaluations import regression_evals

In [4]:
class Learning:
    def __init__(self, mdata=None):
        self.mdata = mdata
        self.best = {
            pc: {'MODEL': None, 'SCORE': 0, 'MODEL_KEY': ''} \
                for pc in cfg.DATA_PC_TYPE
        }
        self.history = {}
        self._run()
        
    def _run(self):
        if self.mdata is None:
            self.mdata = get_modeling_data()
        for mkey in cfg.MODELS_KEY:
            for pckey in cfg.DATA_PC_TYPE:
                self._ml_model_fit_evals(mkey=mkey, pckey=pckey)
        # 최고 모델 저장
        for pckey in cfg.DATA_PC_TYPE:
            save_pickle(self.best[pckey]['MODEL'], 
                        file_code=f'DUMP,MODELS,{pckey},BEST')
        save_pickle(self.history, file_code='DUMP,MODELING_HISTORY')
                
    def _ml_model_fit_evals(self, mkey=None, pckey=None):
        # print(f'MODEL: {mkey}, POLE_CNT: {pckey}')
        model = cfg.MODELS['ML'][mkey]
        data = {key: self.mdata[pckey][key] for key in cfg.DATA_MD_TYPE}
        train_y = data['TRAIN_y'].to_numpy().reshape(-1)
        try:
            # model.fit(data['TRAIN_X'], data['TRAIN_y'])
            model.fit(data['TRAIN_X'], train_y)
        except ConvergenceWarning as ce:
            # 모델이 정상적으로 수렴되지 않을 때 발생하는 오류로,
            # LASSO 알고리즘으로 메델 생성시 발생함(무시해도 됨)
            pass
        pred = model.predict(data['TEST_X'])
        evals = regression_evals(y=data['TEST_y'].to_numpy(), p=pred, verbose=0)
        self._save_history(mkey, pckey, evals, model)
        
    def _save_history(self, mkey=None, pckey=None, evals=None, model=None):
        # 알고리즘별, 전주 숫자별 모델 저장
        save_pickle(model, file_code=f'DUMP,MODELS,{pckey},{mkey}')
        # 학습결과 저장
        self.history[f'({mkey}, {pckey})'] = evals
        # 전주 숫자별 최고 모델 선별(r2score가 가장 높은 모델)
        if self.best[pckey]['SCORE'] < evals[2]:
            self.best[pckey].update({
                'SCORE': evals[2], 'MODEL': model, 'MODEL_KEY': mkey
            })

In [5]:
ml = Learning()

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [6]:
ml.best

{'ALL': {'MODEL': GradientBoostingRegressor(),
  'SCORE': 0.7353331721227276,
  'MODEL_KEY': 'GBR'},
 '1': {'MODEL': ElasticNet(alpha=0.1, random_state=1234),
  'SCORE': 0.39347203777461004,
  'MODEL_KEY': 'EN'},
 'N1': {'MODEL': GradientBoostingRegressor(),
  'SCORE': 0.7402007481164794,
  'MODEL_KEY': 'GBR'}}