---
# Setting
---

<br>

## Import Modules

In [None]:
import gc
gc.collect()

In [None]:
# !pip install pytimekr
# !pip install optuna

In [None]:
# d MyPython/2_Dacon_JEJU/DAT/
# unzip open.zip

In [None]:
import matplotlib as mpl
import matplotlib.font_manager as fm

fe = fm.FontEntry(fname='../NanumFont/NanumGothic.ttf',name='NanumGothic')
fm.fontManager.ttflist.insert(0, fe)  # or append is fine
mpl.rcParams['font.family'] = fe.name # = 'your custom ttf font name'

In [None]:
import os
import sys
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

import numpy as np
from tqdm import tqdm, trange
tqdm.pandas()

import warnings
warnings.filterwarnings(action='ignore')
warnings.simplefilter(action='ignore', category=FutureWarning) # FutureWarning 제거
os.environ['PYTHONWARNINGS']='ignore::FutureWarning'

import itertools
import datetime
from pytimekr import pytimekr
import matplotlib.pyplot as plt
import seaborn as sns

import multiprocessing as mp
from joblib import Parallel, delayed

# import datatable as dt

In [None]:
# https://stackoverflow.com/questions/24983493/tracking-progress-of-joblib-parallel-execution

import contextlib
import joblib
from tqdm import tqdm

@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""
    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        def __call__(self, *args, **kwargs):
            tqdm_object.update(n=self.batch_size)
            return super().__call__(*args, **kwargs)

    old_batch_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_batch_callback
        tqdm_object.close()

In [None]:
def abline(slope, intercept, color):
    axes = plt.gca()
    x_vals = np.array(axes.get_xlim())
    y_vals = intercept + slope * x_vals
    plt.plot(x_vals, y_vals, '--',color=color)
    
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print('Error: Creating directory. ' + directory)
        
def cnt(x):
    vc = x.value_counts().sort_index()
    res = pd.DataFrame({
        'index' : vc.index,
        'freq'  : vc.values,
    })
    res['rate'] = 100 * res['freq'] / res['freq'].sum()
    return res

In [None]:
from sklearn.metrics import mean_absolute_error

# verbose=0로 만들어주는 함수
# (참조) https://stackoverflow.com/questions/11130156/suppress-stdout-stderr-print-from-python-functions
class suppress_stdout_stderr(object):
    '''
    A context manager for doing a "deep suppression" of stdout and stderr in
    Python, i.e. will suppress all print, even if the print originates in a
    compiled C/Fortran sub-function.
       This will not suppress raised exceptions, since exceptions are printed
    to stderr just before a script exits, and after the context manager has
    exited (at least, I think that is why it lets exceptions through).

    '''
    def __init__(self):
        # Open a pair of null files
        self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
        # Save the actual stdout (1) and stderr (2) file descriptors.
        self.save_fds = (os.dup(1), os.dup(2))

    def __enter__(self):
        # Assign the null pointers to stdout and stderr.
        os.dup2(self.null_fds[0], 1)
        os.dup2(self.null_fds[1], 2)

    def __exit__(self, *_):
        # Re-assign the real stdout/stderr back to (1) and (2)
        os.dup2(self.save_fds[0], 1)
        os.dup2(self.save_fds[1], 2)
        # Close the null files
        os.close(self.null_fds[0])
        os.close(self.null_fds[1])

<br>

User Functions

In [None]:
# from lib.MyModel import *

<br>

## Initial Values

In [None]:
DAT_PATH = "../DAT/"

start_time = datetime.datetime.now()
print(start_time)

In [None]:
DL_SETTING = '/device:GPU:0' #'/device:CPU:0'

<br></br>

---
# Modeling
---
- tf.data.dataset 참조
    - [참조1](https://nodoudt.tistory.com/43)
    - [참조2](https://ericabae.medium.com/tensorflow-2-0-csv-%ED%8C%8C%EC%9D%BC-%ED%98%95%EC%8B%9D-%EB%8D%B0%EC%9D%B4%ED%84%B0-%EA%B0%80%EC%A0%B8%EC%98%A4%EA%B8%B0-eddaa88d3112)

<br>

## DL

In [None]:
import tensorflow as tf
# import tensorflow.compat.v1 as tf
from tensorflow.keras.layers import Input, Dense, RepeatVector, LSTM, GRU, TimeDistributed, Bidirectional, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.constraints import NonNeg
from tensorflow.keras.optimizers import Adam

# Hyperparameter Optimization
import optuna
from optuna.integration import TFKerasPruningCallback
from optuna.trial import TrialState

In [None]:
def get_dataset(file_path,batch_size,num_epochs,is_pack,shuffle,**kwargs):
    
    dataset = tf.data.experimental.make_csv_dataset(
        file_path,
        batch_size=batch_size,
        num_epochs=num_epochs,
        ignore_errors=True, 
        **kwargs
    )
    
    def pack(features, label):
        return tf.stack(list(features.values()), axis=-1), label
    
    if is_pack:
        dataset = dataset.map(pack)

    if shuffle:
        dataset = dataset.shuffle(500)
    
    return dataset

In [None]:
import glob
segment = [path.replace('../MDL/DL/model_','').replace('.h5','') for path in glob.glob('../MDL/DL/*')]
segment

In [None]:
seg = segment[3]

train_dataset = get_dataset(
    file_path=f'../OUT/segment/{seg}/train_df.csv',
    batch_size=16,
    num_epochs=1,
    label_name='target',
    is_pack=True,
    shuffle=True,
)

test_dataset = get_dataset(
    file_path=f'../OUT/segment/{seg}/test_df.csv',
    batch_size=16,
    num_epochs=1,
    is_pack=False,
    shuffle=False,
)

# model = Sequential()
# model.add(Dense(units=512,activation='elu'))
# model.add(Dropout(0.2))
# model.add(Dense(units=256,activation='elu'))
# model.add(Dropout(0.2))
# model.add(Dense(units=1,activation=None,kernel_constraint='non_neg'))
# model.compile(optimizer=Adam(learning_rate=0.01),loss='huber',metrics=['mse'])

model = tf.keras.models.load_model(f'../MDL/DL/model_{seg}.h5')

In [None]:
train_df = pd.read_csv(f'../OUT/segment/{seg}/train_df.csv')
# test_df  = pd.read_csv(f'../OUT/segment/{seg}/test_df.csv')

true = train_df.target
pred = model.predict(train_dataset).reshape(-1)

In [None]:
from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_true=true,y_pred=pred)

In [None]:
import datatable as dt

tr_tmp = dt.fread(f'../OUT/train_fn_oh_noseg.csv')

In [None]:
tr_tmp.shape[0]/1

In [4]:
25*(65*256/3600/24)

4.814814814814814