In [1]:
import os
import re
import math
import datetime
import numpy as np
import pandas as pd

### 기본 셋팅하기

In [2]:
# -----------------------------------------------|
# ----- Step 1. Settings ------------------------
# -----------------------------------------------|
os.chdir("C:/Users/begas/Desktop/Project/SmartFarm")
os.getcwd()

home_path  = os.getcwd()
data_path  = os.path.join(home_path,"1. DAT")
save_path  = os.path.join(home_path,"2. OUT")
model_path = os.path.join(home_path,"3. MODEL")

data_files = os.path.join(data_path,os.listdir(data_path)[6])
print(data_files)

working_data = '20220916'

C:\Users\begas\Desktop\Project\SmartFarm\1. DAT\22_이엔티DB_신천농장_외부.csv


### 데이터 불러오기

In [3]:
def LOAD_FN(data_files : os.path) -> pd.DataFrame :
    '''
    * 입력
    data_files : raw 데이터 위치
    
    * 출력
    raw_dat    : 데이터 프레임
    '''
    # 데이터 불러오기
    raw_dat = pd.read_csv(data_files)
    
    # 데이터 변수명 변경
    if 'sensingDt' in raw_dat.columns : raw_dat.rename(columns = {'sensingDt' : '시간'    }, inplace = True)
    if 'nh3'       in raw_dat.columns : raw_dat.rename(columns = {'nh3'       : '암모니아'}, inplace = True)
    if 'h2s'       in raw_dat.columns : raw_dat.rename(columns = {'h2s'       : '황화수소'}, inplace = True)
    if 'tmp'       in raw_dat.columns : raw_dat.rename(columns = {'tmp'       : '온도'    }, inplace = True)
    if 'hum'       in raw_dat.columns : raw_dat.rename(columns = {'hum'       : '습도'    }, inplace = True)
    if 'voc'       in raw_dat.columns : raw_dat.rename(columns = {'voc'       : '환기팬'  }, inplace = True)
        
    # Time : date 변수로 변경
    raw_dat['시간'] = pd.to_datetime(raw_dat['시간'])
    
    # 제거1. 암모니아와 황화수소가 두개의 변수일 경우 ppm으로 선택
    if len([s for s in raw_dat.columns if '암모니아' in s]) > 1 : 
        del_cols = [s for s in raw_dat.columns if ('mV' in s) or ('(㎷)' in s)]
        raw_dat = raw_dat.drop(columns = del_cols)

    if len([s for s in raw_dat.columns if '황화수소' in s]) > 1 : 
        del_cols = [s for s in raw_dat.columns if ('mV' in s) or ('(㎷)' in s)]
        raw_dat = raw_dat.drop(columns = del_cols)
        
    # 제거2. 변수명에 영어 포함시 영어 제거
    cols_list = []
    for cols in raw_dat.columns:
        result = re.sub("[a-zA-Z]|[^\w\s]", "", cols)
        cols_list.append(result)
    raw_dat.columns = cols_list
    
    # 제거3. 모든 값이 NA인 변수 제거
    del_cols = raw_dat.columns[raw_dat.isna().mean() == 1]
    if len(del_cols) > 0 :
        print('모든 값이 NA인 변수 제거 :','/'.join(del_cols))
        raw_dat = raw_dat.drop(columns = del_cols)

    # 제거4. 단일값
    col_unq_val = raw_dat.apply(lambda xx : len(xx.unique()), axis = 0)
    del_cols    = raw_dat.columns[col_unq_val == 1]
    if len(del_cols) > 0 :
        print('모든 값이 단일값인 변수 제거 :','/'.join(del_cols))
        raw_dat = raw_dat.drop(columns = del_cols) 
    
    return raw_dat

In [4]:
load_dat = LOAD_FN(data_files)

모든 값이 단일값인 변수 제거 : 온도/환기팬


### 학습 데이터 전처리 하기

In [5]:
def PREPROCESS_FN(DAT : pd.DataFrame, time_grp : int) -> pd.DataFrame :
    '''
    * 입력
    DAT              : LOAD_FN의 출력 데이터 프레임
    time_grp         : 시간대별 통계량 요약 기준 단위(ex : if time_grp = 60, then 60분 단위로 통계량 요약)
    
    * 출력
    final_summary_df : 데이터 프레임
    '''
    
    print("Step1. 이상치를 허용범위 내로 보정")
    if '환기팬'       in DAT.columns : DAT.loc[( DAT['환기팬']       <   0 ) & (~DAT['환기팬'].isna())      ,'환기팬']       =   0
    if '암모니아'     in DAT.columns : DAT.loc[( DAT['암모니아']     <   0 ) & (~DAT['암모니아'].isna())    ,'암모니아']     =   0
    if '황화수소'     in DAT.columns : DAT.loc[( DAT['황화수소']     <   0 ) & (~DAT['황화수소'].isna())    ,'황화수소']     =   0
    if '거품도포량'   in DAT.columns : DAT.loc[( DAT['거품도포량']   <   0 ) & (~DAT['거품도포량'].isna())  ,'거품도포량']   =   0
    if '거품도포시간' in DAT.columns : DAT.loc[( DAT['거품도포시간'] <   0 ) & (~DAT['거품도포시간'].isna()),'거품도포시간'] =   0
        
    if '온도' in DAT.columns : DAT.loc[( DAT['온도'] >  50 ) & (~DAT['온도'].isna()),'온도'] =  50
    if '온도' in DAT.columns : DAT.loc[( DAT['온도'] < -50 ) & (~DAT['온도'].isna()),'온도'] = -50
    if '습도' in DAT.columns : DAT.loc[( DAT['습도'] > 100 ) & (~DAT['습도'].isna()),'습도'] = 100
    if '습도' in DAT.columns : DAT.loc[( DAT['습도'] <   0 ) & (~DAT['습도'].isna()),'습도'] =   0
        
    print("Step2. 시간 변수를", time_grp, "분 단위로 변경")
    
    def floor_dt(dt, time_grp = time_grp):
        # how many secs have passed this day
        nsecs = dt.hour*3600 + dt.minute*60 + dt.second + dt.microsecond*1e-6
        delta = nsecs % (time_grp * 60)
        return dt - datetime.timedelta(seconds=delta)
    
    DAT['시간'] = DAT['시간'].apply(floor_dt)  
    
    print("Step3. 시간별 요약통계량 데이터 생성")
    summary_cols = [s for s in DAT.columns if '시간' not in s]
    mean_df = DAT.groupby('시간').apply(lambda xx : xx[summary_cols].mean(skipna = True)).reset_index(drop = False).rename(columns = {s:s+"_mean" for s in summary_cols})
    min_df  = DAT.groupby('시간').apply(lambda xx : xx[summary_cols].min(skipna = True)).reset_index(drop = True).rename(columns = {s:s+"_min" for s in summary_cols})
    max_df  = DAT.groupby('시간').apply(lambda xx : xx[summary_cols].max(skipna = True)).reset_index(drop = True).rename(columns = {s:s+"_max" for s in summary_cols})
    std_df  = DAT.groupby('시간').apply(lambda xx : xx[summary_cols].std(skipna = True)).reset_index(drop = True).rename(columns = {s:s+"_std" for s in summary_cols})

    # 요약 통계량 데이터 프레임 생성
    summary_df = pd.concat([mean_df,min_df,max_df,std_df], axis = 1)
    
    print("Step4. 특정 시간대의 데이터가 비어있을 시 해당 시간대 생성")
    st_time = summary_df['시간'].iloc[0]
    ed_time = summary_df['시간'].iloc[-1]
    base_date_df = pd.DataFrame({'시간' : pd.date_range(st_time,ed_time, freq = 'H')})
    n_missing_hour = len(set(set(base_date_df['시간'])) - set(set(summary_df['시간'])))
    print(f'> {n_missing_hour}개의 시간대가 비어있습니다. 해당 시간대를 생성합니다.')
    
    print("Step5. 선형 보간법 적용")
    final_summary_df = pd.merge(base_date_df,summary_df, how = 'left', on = '시간')
    final_summary_df.iloc[:,1:] = final_summary_df.iloc[:,1:].interpolate(method='linear')
    
    return final_summary_df 

In [6]:
preprocess_result_df = PREPROCESS_FN(load_dat, time_grp = 60)

Step1. 이상치를 허용범위 내로 보정
Step2. 시간 변수를 60 분 단위로 변경
Step3. 시간별 요약통계량 데이터 생성
Step4. 특정 시간대의 데이터가 비어있을 시 해당 시간대 생성
> 22개의 시간대가 비어있습니다. 해당 시간대를 생성합니다.
Step5. 선형 보간법 적용


### 학습 - 분석용 데이터 생성

In [7]:
def TRAIN_ANAL_DAT_FN(dat : pd.DataFrame, time_window : list) -> dict :
    print("Step1. 목표변수: 암모니아, 황화수소 단위별 max 값")
    dat.rename(columns = {'암모니아_max' : 'y_암모니아', '황화수소_max' : 'y_황화수소','시간' : 'base_time'}, inplace = True)
    dat.rename(columns = {s : 'x_' + s for s in dat.columns if ('time' not in s) and ('y_' not in s)}, inplace = True)
    
    print("Step2. 시차 변수 생성")
    old_dat = dat.copy()
    new_dat = dat.copy()
    
    new_dat = new_dat[['base_time'] + [s for s in new_dat.columns if 'y_' in s]].copy()
    new_dat.rename(columns = {'base_time' : 'predict_time'}, inplace = True)
    
    xvar_list = [s for s in old_dat.columns if 'x_' in s]
    for w in time_window :
        new_dat['base_time'] =\
        new_dat['predict_time'].apply(lambda xx : pd.date_range(end = xx, periods = w + 1, freq = 'H')[0])

        new_dat = \
        pd.merge(new_dat, old_dat[['base_time'] + xvar_list].rename(columns = {s : s +'_'+str(w) for s in xvar_list})
                , how = 'left'
                , on  = 'base_time')
        
    # 시차 데이터로 인한 결측 제거
    new_dat = new_dat.iloc[max(time_window):,:].reset_index(drop = True)
    
    print("Step3. 학습/검증 데이터 7:3으로 분할")
    train_dat = new_dat.iloc[:math.ceil(new_dat.shape[0] * 0.7),:].reset_index(drop = True)
    test_dat  = new_dat.iloc[math.ceil(new_dat.shape[0] * 0.7):,:].reset_index(drop = True)
    
    print("Step4. 단일 값만 가지는 설명변수 제거")
    # 학습 데이터에서 단일값만 가지는 변수 제거
    xvar_list = [s for s in train_dat.columns if 'x_' in s]
    single_value_var_index = np.where(train_dat[xvar_list].apply(lambda xx : xx.nunique()) == 1)[0]
    single_value_var_names = [xvar_list[s] for s in single_value_var_index]
    xvar_list = list(set(xvar_list) - set(single_value_var_names))
    xvar_list.sort()
    
    ret = dict({'train_dat' : train_dat, 'test_dat' : test_dat, 'x_var' : xvar_list})
    return ret

In [8]:
TRAIN_ANAL_DAT = TRAIN_ANAL_DAT_FN(preprocess_result_df, time_window = [12,24])

Step1. 목표변수: 암모니아, 황화수소 단위별 max 값
Step2. 시차 변수 생성
Step3. 학습/검증 데이터 7:3으로 분할
Step4. 단일 값만 가지는 설명변수 제거


### 모델링

In [9]:
import xgboost    as xgb
import lightgbm   as lgb
import tensorflow as tf
from sklearn.linear_model    import LinearRegression
from sklearn.ensemble        import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, ParameterGrid, KFold, train_test_split

  import pandas.util.testing as tm


In [None]:
def MODELING_MLR(yvar_name : str, dat : pd.DataFrame) -> dict :
    '''
    * 입력
    yvar_name : 종속변수 명
    dat       : TRAIN_ANAL_DAT의 출력 데이터 프레임
    
    * 출력
    dict      : 모델 및 모델 관련 정보들을 가지고 있는 딕셔너리
    '''
    print(f'Model Type is MLR')
    
    # 학습에 사용할 설명변수 명 지정
    xvar_name = dat['x_var']

    # 학습, 검증 데이터 준비
    mlr_train_y = np.array(dat['train_dat'][yvar_name])
    mlr_train_x = np.array(dat['train_dat'][xvar_name])
    mlr_test_x  = np.array(dat['test_dat'][xvar_name])

    # 회귀분석 Fitting
    mlr_model = LinearRegression()
    mlr_model.fit(X=mlr_train_x, y = mlr_train_y)

    # 학습/검증데이터 예측
    dat['train_dat']['pred'] = mlr_model.predict(X=mlr_train_x)
    dat['test_dat']['pred'] = mlr_model.predict(X=mlr_test_x)
    ret = dict({'model' : mlr_model,'model_name' : 'MLR' , "yvar" : yvar_name, "xvar" : xvar_name, "train_res" : dat['train_dat'], "test_res" : dat['test_dat']})
    return ret

In [None]:
MLR_RESULT = MODELING_MLR(yvar_name = 'y_암모니아', dat = TRAIN_ANAL_DAT)

In [None]:
def MODELING_RF(yvar_name : str, dat : pd.DataFrame) -> dict :
    '''
    * 입력
    yvar_name : 종속변수 명
    dat       : TRAIN_ANAL_DAT의 출력 데이터 프레임
    
    * 출력
    dict      : 모델 및 모델 관련 정보들을 가지고 있는 딕셔너리
    '''
    print(f'Model Type is Random Forest')
    
    # 학습에 사용할 설명변수 명 지정
    xvar_name = dat['x_var']

    # 학습, 검증 데이터 준비
    rf_train_y = np.array(dat['train_dat'][yvar_name])
    rf_train_x = np.array(dat['train_dat'][xvar_name])
    rf_test_x  = np.array(dat['test_dat'][xvar_name])
    
    # Random Forest Fitting
    rf_model = RandomForestRegressor(n_estimators=100)
    rf_model.fit(X=rf_train_x, y = rf_train_y)
    
    # 학습/검증데이터 예측
    dat['train_dat']['pred'] = rf_model.predict(rf_train_x)
    dat['test_dat']['pred']  = rf_model.predict(rf_test_x)
    ret = dict({'model' : rf_model,'model_name' : 'RF' , "yvar" : yvar_name, "xvar" : xvar_name, "train_res" : dat['train_dat'], "test_res" : dat['test_dat']})

    return ret

In [None]:
RF_RESULT = MODELING_RF(yvar_name = 'y_암모니아', dat = TRAIN_ANAL_DAT)

In [None]:
def MODELING_XGB(yvar_name : str, dat : pd.DataFrame) -> dict :
    '''
    * 입력
    yvar_name : 종속변수 명
    dat       : TRAIN_ANAL_DAT의 출력 데이터 프레임
    
    * 출력
    dict      : 모델 및 모델 관련 정보들을 가지고 있는 딕셔너리
    '''
    print(f'Model Type is XGBoost')
    
    # 학습에 사용할 설명변수 명 지정
    xvar_name = dat['x_var']
    
    # XGBoost 학습 데이터 준비
    train_d_mat = xgb.DMatrix(data = dat['train_dat'][xvar_name], label = dat['train_dat'][yvar_name])
    
    # Grid Search
    params = {'max_depth':[5,7],
              'min_child_weight':[1.0,3.0],
              'colsample_bytree':[0.5,0.75]}
    params_grid = pd.DataFrame(ParameterGrid(params))
    
    score_list           = []
    num_boost_round_list = []
    for params_idx, params in params_grid.iterrows() :
        params_tmp  = {'max_depth'       : int(params['max_depth']),
                       'min_child_weight': float(params['min_child_weight']),
                       'colsample_bytree': float(params['colsample_bytree'])}
        xgb_cv      = xgb.cv(dtrain = train_d_mat, params = params_tmp, num_boost_round = 200, nfold = 3, early_stopping_rounds = 10, maximize = 0, verbose_eval= 0, seed =1234)
        num_boost_round_list.append(xgb_cv.shape[0])
        score_list.append(xgb_cv['test-rmse-mean'].iloc[-1])
    
    # Find Best Parameter
    params_grid['num_boost_round'] = num_boost_round_list
    params_grid['score']           = score_list
    best_params = params_grid.iloc[np.argmin(params_grid['score']),:]
    xgb_train_params = {'max_depth'       : int(best_params['max_depth']),
                        'min_child_weight': float(best_params['min_child_weight']),
                        'colsample_bytree': float(best_params['colsample_bytree'])}
    num_boost_round = int(best_params['num_boost_round'])    
    
    # XGBoost Fitting
    xgb_model = xgb.train(dtrain = train_d_mat, params = xgb_train_params, num_boost_round = num_boost_round)
    
    # 학습/검증데이터 예측
    dat['train_dat']['pred'] = xgb_model.predict(xgb.DMatrix(dat['train_dat'][dat['x_var']]))
    dat['test_dat']['pred']  = xgb_model.predict(xgb.DMatrix(dat['test_dat'][dat['x_var']]))
    
    ret = dict({'model' : xgb_model,'model_name' : 'XGB' , "yvar" : yvar_name, "xvar" : xvar_name, "train_res" : dat['train_dat'], "test_res" : dat['test_dat']})
    
    return ret

In [None]:
XGB_RESULT = MODELING_XGB(yvar_name = 'y_암모니아', dat = TRAIN_ANAL_DAT)

In [None]:
def MODELING_LGB(yvar_name : str, dat : pd.DataFrame) -> dict :
    '''
    * 입력
    yvar_name : 종속변수 명
    dat       : TRAIN_ANAL_DAT의 출력 데이터 프레임
    
    * 출력
    dict      : 모델 및 모델 관련 정보들을 가지고 있는 딕셔너리
    '''
    print(f'Model Type is LightGBM')
    
    # 학습에 사용할 설명변수 명 지정
    xvar_name = dat['x_var']
    
    # LightGBM 학습 데이터 준비
    train_d_mat = lgb.Dataset(data = dat['train_dat'][xvar_name], label = dat['train_dat'][yvar_name])
    
    # Grid Search
    params = {'num_leaves' : [3,31],
              'learning_rate' : [0.1],
              'feature_fraction' : [1],
              'bagging_fraction' : [1],
              'max_bin' : [255]}
    params_grid = pd.DataFrame(ParameterGrid(params))
    score_list           = []
    num_boost_round_list = []
    for params_idx, params in params_grid.iterrows():
        params_tmp = {'objective'        : 'regression',
                      'boosting'         : "gbdt",
                      'metric'           : 'rmse',
                      'num_leaves'       : int(params['num_leaves']),
                      'learning_rate'    : float(params['learning_rate']),
                      'feature_fraction' : float(params['feature_fraction']),
                      'bagging_fraction' : float(params['bagging_fraction']),
                      'max_bin'          : int(params['max_bin'])}
        lgb_cv     = lgb.cv(params = params_tmp, train_set = train_d_mat, num_boost_round = 200, nfold = 3, early_stopping_rounds = 10, verbose_eval= 0, seed =1234, stratified=False)
        num_boost_round_list.append(len(lgb_cv['rmse-mean']))
        score_list.append(lgb_cv['rmse-mean'][-1])
    
    # Find Best Parameter
    params_grid['num_boost_round'] = num_boost_round_list
    params_grid['score']           = score_list
    best_params = params_grid.iloc[np.argmin(params_grid['score']),:]
    
    lgb_train_params = {'objective'        : 'regression',
                        'boosting'         : "gbdt",
                        'metric'           : 'rmse',
                        'num_leaves'       : int(best_params['num_leaves']),
                        'learning_rate'    : float(best_params['learning_rate']),
                        'feature_fraction' : float(best_params['feature_fraction']),
                        'bagging_fraction' : float(best_params['bagging_fraction']),
                        'max_bin'          : int(best_params['max_bin'])}
    num_boost_round = int(best_params['num_boost_round'])    
    
    # LightGBM Fitting
    lgb_model     = lgb.train(params = params_tmp, train_set = train_d_mat, num_boost_round = num_boost_round)
    
    # 학습/검증데이터 예측
    dat['train_dat']['pred'] = lgb_model.predict(dat['train_dat'][dat['x_var']])
    dat['test_dat']['pred']  = lgb_model.predict(dat['test_dat'][dat['x_var']])

    ret = dict({'model' : lgb_model,'model_name' : 'LGB' , "yvar" : yvar_name, "xvar" : xvar_name, "train_res" : dat['train_dat'], "test_res" : dat['test_dat']})

    return ret

In [None]:
LGB_RESULT = MODELING_LGB(yvar_name = 'y_암모니아', dat = TRAIN_ANAL_DAT)

In [81]:
## ANN Model Class
class MODELING_ANN():
    def __init__(self, yvar_name : str, dat : pd.DataFrame) -> dict :
        self.yvar_name = yvar_name
        self.xvar_name = dat['x_var']
        self.dat       = dat
        self._preprocess()
        self._fit()
        self._pred()
        self.ret = dict({'model' : self.ann.model,'model_name' : 'ANN' , "yvar" : self.yvar_name, "xvar" : self.xvar_name, 
                         "train_res" : self.dat['train_dat'], "test_res" : self.dat['test_dat'], "scale_info" : self.scale_info})

    def _preprocess(self):
        
        # 전처리
        self.train_x_array = np.array(self.dat['train_dat'][self.xvar_name])
        self.train_y_array = np.array(self.dat['train_dat'][self.yvar_name])
        self.test_x_array = np.array(self.dat['test_dat'][self.xvar_name])
        self.test_y_array = np.array(self.dat['test_dat'][self.yvar_name])

        # Min-Max Scaling
        self.scale_info = pd.DataFrame({'xvar_name' : self.xvar_name, 'min' : np.apply_along_axis(min, 0, self.train_x_array), 'max' : np.apply_along_axis(max, 0, self.train_x_array)})
        self.x_tr_scale = np.apply_along_axis(lambda xx : (xx  - self.scale_info['min'])/(self.scale_info['max'] - self.scale_info['min'] + 1e-10) ,1,self.train_x_array)
        self.x_te_scale = np.apply_along_axis(lambda xx : (xx  - self.scale_info['min'])/(self.scale_info['max'] - self.scale_info['min'] + 1e-10) ,1,self.test_x_array)
        self.x_te_scale[self.x_te_scale < 0] = 0
        self.x_te_scale[self.x_te_scale > 1] = 1

    def _fit(self):
        # ANN Build
        input_shape = self.train_x_array.shape[1] # input shape 설정
        h_units     = [16,8]                  # 모델 Hidden Units 설정
        self.ann = ann_model(input_shape,h_units) # 모델 Build
        
        # Early Stopping, Reduce Learning Rate, HIstory
        EarlyStopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', mode = 'min', patience = 5, restore_best_weights=True, verbose = 0) 
        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', mode = 'min', factor=0.5, patience=5, verbose=0, min_lr=1e-5)
        history = tf.keras.callbacks.History()

        self.ann.model.fit(self.x_tr_scale, self.train_y_array, validation_split = 0.3
                                               , epochs = 100
                                               , batch_size = 16
                                               , callbacks = [EarlyStopping, reduce_lr, history]
                                               , verbose = 0)
    def _pred(self):
        self.dat['train_dat']['pred'] = self.ann.model.predict(self.x_tr_scale)
        self.dat['test_dat']['pred'] = self.ann.model.predict(self.x_te_scale)
            
 ## ANN Model Class
class ann_model():
    def __init__(self, input_shape : int, h_units : list):
        self.input_shape = input_shape
        self.h_units     = h_units
        self._build()
        self._compile()

    def _build(self):
        input_layer  = tf.keras.Input(shape = self.input_shape, name = 'input_layer')
        for idx,h in enumerate(self.h_units):
            if idx == 0:
                ann_layer = tf.keras.layers.Dense(h, activation = 'relu', name = f'ann_layer_{str(idx+1)}')(input_layer)
            else :
                ann_layer = tf.keras.layers.Dense(h, activation = 'relu', name = f'ann_layer_{str(idx+1)}')(ann_layer)
        output_layer   = tf.keras.layers.Dense(1, activation = 'linear', name = 'output_layer')(ann_layer)
        self.model = tf.keras.Model(inputs = input_layer, outputs = output_layer)

    def _compile(self):
        self.model.compile(optimizer = 'Adam', loss = 'mean_squared_error')        

In [82]:
ANN_RESULT = MODELING_ANN(yvar_name = 'y_암모니아', dat = TRAIN_ANAL_DAT)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) 

In [464]:
## LSTM Model Class
class MODELING_LSTM():
    def __init__(self, yvar_name : str, dat : pd.DataFrame) -> dict :
        self.yvar_name = yvar_name
        self.xvar_name = dat['x_var']
        self.dat       = dat
        self._preprocess()
        self._fit()
        self._pred()
        self.ret = dict({'model' : self.lstm.model,'model_name' : 'LSTM' , "yvar" : self.yvar_name, "xvar" : self.xvar_name, 
                         "train_res" : self.dat['train_dat'], "test_res" : self.dat['test_dat'], "scale_info" : self.scale_info})
        
        
    def _preprocess(self):
        # 전체 데이터 생성(시계열 설명변수 생성을 위해)
        self.full_dat = pd.concat([self.dat['train_dat'],self.dat['test_dat']], ignore_index=True)

        # LSTM 전처리 데이터 생성
        self.nTimeStpes = 5
        self.nInterval  = 1

        # 디멘전 배치 사이즈
        self.dim_batch = self.full_dat.shape[0] - (self.nInterval * (self.nTimeStpes - 1))

        # 데이터 인덱스 생성
        idx_list = []
        for i in range(dim_batch):
            idx_list.append(np.arange(start = i, stop = nInterval * (nTimeStpes - 1) + i + 1, step = nInterval))

        # LSTM 시계열 데이터 생성    
        x_array = []
        y_array = []
        date_df = []
        for idx in range(len(idx_list)):
            x_array.append(np.array(full_dat[xvar_name].iloc[idx_list[idx]]))
            y_array.append(np.array(full_dat[yvar_name].iloc[idx_list[idx]]))
            date_df.append(full_dat['predict_time'].iloc[idx_list[idx]].max())

        self.x_array = np.array(x_array)
        self.y_array = np.array(y_array)
        self.date_df = np.array(date_df)    

        # 학습, 검증 데이터로 다시 나누기
        self.test_start_date = dat['test_dat']['predict_time'].iloc[0]

        self.train_x_array = self.x_array[self.date_df<self.test_start_date,:,:]
        self.train_y_array = self.y_array[self.date_df<self.test_start_date,:]

        self.test_x_array = self.x_array[self.date_df>=self.test_start_date,:,:]
        self.test_y_array = self.y_array[self.date_df>=self.test_start_date,:]

        # Min, Max Scaling
        self.scale_info = pd.DataFrame({'xvar_name' : xvar_name, 'min' : np.apply_along_axis(min, 0, np.vstack(self.train_x_array)), 'max' : np.apply_along_axis(max, 0, np.vstack(self.train_x_array))})

        # Min, Max Scaling
        self.x_tr_scale = np.apply_along_axis(lambda xx : (xx - self.scale_info['min'])/(self.scale_info['max'] - self.scale_info['min'] + 1e-10), 2, self.train_x_array)
        self.x_te_scale = np.apply_along_axis(lambda xx : (xx - self.scale_info['min'])/(self.scale_info['max'] - self.scale_info['min'] + 1e-10), 2, self.test_x_array)
        self.x_te_scale[self.x_te_scale<0] = 0
        self.x_te_scale[self.x_te_scale>1] = 1

            
    def _fit(self):
        # LSTM Build
        self.input_shape = (self.nTimeStpes, self.train_x_array.shape[2]) # input shape 설정
        self.h_units     = [4]                  # 모델 Hidden Units 설정
        self.lstm = lstm_model(input_shape,h_units) # 모델 Build

        # Early Stopping, Reduce Learning Rate, HIstory
        EarlyStopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', mode = 'min', patience = 5, restore_best_weights=True, verbose = 0) 
        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', mode = 'min', factor=0.5, patience=5, verbose=0, min_lr=1e-5)
        history = tf.keras.callbacks.History()

        self.lstm.model.fit(self.x_tr_scale, self.train_y_array, validation_split = 0.3
                                               , epochs = 100
                                               , batch_size = 16
                                               , callbacks = [EarlyStopping, reduce_lr, history]
                                               , verbose = 0)
        

    def _pred(self):
        self.tr_pred = self.lstm.model.predict(self.x_tr_scale)
        self.te_pred = self.lstm.model.predict(self.x_te_scale)

        self.tr_pred = np.array([self.tr_pred[s][-1] for s in range(len(self.tr_pred))])
        self.tr_pred = np.vstack([np.zeros(self.nInterval * (self.nTimeStpes - 1)).reshape(-1,1),self.tr_pred])
        self.te_pred = np.array([self.te_pred[s][-1] for s in range(len(self.te_pred))])
        
        
## LSTM Model Class
class lstm_model():
    def __init__(self, input_shape : tuple, h_units : list):
        self.input_shape = input_shape
        self.h_units     = h_units
        self._build()
        self._compile()

    def _build(self):
        input_layer = tf.keras.Input(shape = self.input_shape, name = 'input_layer')
        for idx,h in enumerate(self.h_units):
            if idx == 0:
                lstm_layer = tf.keras.layers.LSTM(h, return_sequences=True, name = f'lstm_layer_{str(idx+1)}')(input_layer)
            else :
                lstm_layer = tf.keras.layers.LSTM(h, return_sequences=True, name = f'lstm_layer_{str(idx+1)}')(lstm_layer)
        output_layer = tf.keras.layers.Dense(1, activation = 'linear', name = 'output_layer')(lstm_layer)  
        self.model = tf.keras.Model(inputs = input_layer, outputs = output_layer)

    def _compile(self):
        self.model.compile(optimizer = 'Adam', loss = 'mean_squared_error')        
        

In [465]:
LSTM_RESULT = MODELING_LSTM(yvar_name = 'y_암모니아', dat = TRAIN_ANAL_DAT)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) 