In [3]:
import pandas as pd

import aidd.sys.config as cfg
from aidd.utils.data_io import read_data
from aidd.utils.data_io import read_pickle

In [4]:
class Service:
    def __init__(self):
        self.mr_pkl = {     # modeling result pickle[scaler, model]
            pckey: {
                'SCALER': read_pickle(f'DUMP,SCALER,{pckey}'),
                'MODEL': read_pickle(f'DUMP,MODELS,{pckey},BEST'),
            } for pckey in cfg.DATA_PC_TYPE
        }
        # t_cols : training columns
        self.t_cols = read_pickle(file_code='DUMP,MODELING_COLS')
        
    def predict(self, service_df):
        df = service_df
        r_df = df[['CONS_ID', 'TOTAL_CONS_COST']].copy()
        r_tcc = []  # r_tcc: result total cons cost
        X = df[self.t_cols]
        
        for _, row in X.iterrows():
            row_df = pd.DataFrame(row).transpose()
            # 인덱스 초기화: row_df의 첫 row의 인덱스를 '0'으로 통일
            row_df.reset_index(drop=True, inplace=True)
            # 각 row별로 pckey 저장
            pckey = row_df.loc[0, cfg.PC_COL].astype(int)
            # 각 전주 갯 수 별 스케일러와 해당 갯 수의 최고 메델 가져오기
            if pckey == 1:
                scaler = self.mr_pkl['1']['SCALER']
                model = self.mr_pkl['1']['MODEL']
            else:
                scaler = self.mr_pkl['N1']['SCALER']
                model = self.mr_pkl['N1']['MODEL']
            # 전체 데이터에 대한 스케일러와 최고 모델 가져오기
            a_scaler = self.mr_pkl['ALL']['SCALER']
            a_model = self.mr_pkl['ALL']['MODEL']
            
            pred = model.predict(scaler.transform(row_df))[0]
            a_pred = a_model.predict(a_scaler.transform(row_df))[0]
            r_tcc.append([pred, a_pred])
        r_df.loc[:, ['PCKEY_TCC', 'ALL_TCC']] = r_tcc
        return r_df

In [5]:
svc = Service()

In [6]:
svc_df = read_data('ONLINE')
pred_df = svc.predict(svc_df)



Unnamed: 0,CONS_ID,TOTAL_CONS_COST,PCKEY_TCC,ALL_TCC
0,477420204194,9076645,11713660.0,14171200.0
1,475920223725,2176378,4123722.0,4277902.0
2,474620226651,9512744,10463100.0,13254290.0


In [7]:
for _, row in pred_df.iterrows():
    y = row['TOTAL_CONS_COST']
    p = row['PCKEY_TCC']
    ap = row['ALL_TCC']
    print(f' p: {y} vs {p} = {abs(y-p)/y*100:.3f}')
    print(f'ap: {y} vs {ap} = {abs(y-ap)/y*100:.3f}')

 p: 9076645.0 vs 11713656.003436096 = 29.053
ap: 9076645.0 vs 14171198.12148257 = 56.128
 p: 2176378.0 vs 4123721.775615843 = 89.476
ap: 2176378.0 vs 4277901.706869179 = 96.561
 p: 9512744.0 vs 10463101.806429254 = 9.990
ap: 9512744.0 vs 13254288.579833094 = 39.332
