In [None]:
class Scaling:
    def __init__(self, pdf=None):
        self.df = read_data('PP,SL', dtype={'CONS_ID': str}) if pdf is None else pdf
        self.data = {}
        self.sdata = {}
        self._run()
        
    def _run(self):
        self._split_Xy()
        for pc in cfg.DATA_PC_TYPE[1:]:
            self._split_pc(pc_mode=pc)
        for pc in cfg.DATA_PC_TYPE:
            self._split_train_test(pc_mode=pc)
        self._scaling()
        
    def _split_Xy(self):
        df = self.df
        
        # TARGET 컬럼 지정
        target_col = 'TOTAL_CONS_COST'
        # 학습 컬럼 지정
        training_cols = df.columns[5:].tolist()
        # X, y값 분리
        X = df[training_cols].copy()
        y = df[target_col].copy()
        print(f'학습대상 속성 전체 크기: {X.shape}')
        
        self.data['X_ALL'] = X
        self.data['y_ALL'] = y
        
        # 학습 데이터 저장
        save_data(X, file_code='SCALING,X,ALL')
        save_data(y, file_code='SCALING,y,ALL')
        
    def _split_pc(self, pc_mode='1'):
        _X = self.data['X_ALL']
        _y = self.data['y_ALL']
        
        if pc_mode == '1':
            conditions = _X.POLE_CNT == 1
            X, y = _X[conditions], _y[conditions]
        else:
            conditions = _X.POLE_CNT != 1
            X, y = _X[conditions], _y[conditions]
        print(f'PC_MODE[{pc_mode}] X Size: {X.shape}')
        
        self.data[f'X_{pc_mode}'] = X
        self.data[f'y_{pc_mode}'] = y
        
        # 학습 데이터 저장
        save_data(X, file_code=f'SCALING,X,{pc_mode}')
        save_data(y, file_code=f'SCALING,y,{pc_mode}')
    
    def _split_train_test(self, pc_mode='ALL'):
        X = self.data[f'X_{pc_mode}']
        y = self.data[f'y_{pc_mode}']
        
        train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)
        msg = f'PC_MODE[{pc_mode}] Total{X.shape}, Train{train_X.shape}, ' \
              f'Test{test_X.shape}'
        print(msg)
        self.data[f'TRAIN_X_{pc_mode}'] = train_X
        self.data[f'TRAIN_y_{pc_mode}'] = train_y
        self.data[f'TEST_X_{pc_mode}'] = test_X
        self.data[f'TEST_y_{pc_mode}'] = test_y
        
    def _scaling(self):
        for pc in cfg.DATA_PC_TYPE:
            self._scaling_main(pc_mode=pc)
            
    def _scaling_main(self, pc_mode='ALL'):
        train_X = self.data[f'TRAIN_X_{pc_mode}']
        test_X = self.data[f'TEST_X_{pc_mode}']
        
        cols = train_X.columns.tolist()
        scaler = StandardScaler()
        train_sX = scaler.fit_transform(train_X)
        test_sX = scaler.transform(test_X)

        train_sX_df = pd.DataFrame(train_sX, columns=cols)
        test_sX_df = pd.DataFrame(test_sX, columns=cols)
        
        # 클래스에 저장
        self.sdata[f'TRAIN_X_{pc_mode}'] = train_sX_df
        self.sdata[f'TRAIN_y_{pc_mode}'] = self.data[f'TRAIN_y_{pc_mode}']
        self.sdata[f'TEST_X_{pc_mode}'] = train_sX_df
        self.sdata[f'TEST_y_{pc_mode}'] = self.data[f'TEST_y_{pc_mode}']
        
        # 이 부분은 굳이 저장할 필요가 있나 싶음
        save_data(train_sX_df, file_code=f'SCALING,TRAIN_X,{pc_mode}')
        save_data(self.data[f'TRAIN_y_{pc_mode}'], f'SCALING,TRAIN_y,{pc_mode}')
        save_data(test_sX_df, file_code=f'SCALING,TEST_X,{pc_mode}')
        save_data(self.data[f'TEST_y_{pc_mode}'], f'SCALING,TEST_y,{pc_mode}')
        
        # 스케일러 저장
        save_pickle(scaler, file_code=f'DUMP,SCALER,{pc_mode}')