In [1]:
import sys
sys.path.insert(0, '..//src')
sys.path.insert(0, '..//src//data')
sys.path.insert(0, '..//src//features')
sys.path.insert(0, '..//src//models')

import global_func as gf
import data_preprocessing
import predict
import fit_model

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd 

config_dir = '..\\config\\'
data_dir = '..\\' + gf.read_config(config_dir=config_dir, section='DIR', key='DATA_PROCESSED')
models_dir = '..\\' + gf.read_config(config_dir=config_dir, section='DIR', key='MODELS')
remodel_dir = '..\\' + gf.read_config(config_dir=config_dir, section='DIR', key='DATA_REMODEL')


DataPrep = data_preprocessing.DataPreprocessing()
model = gf.load_from_pkl(filename=f'{models_dir}sarimax.pkl')
scaler = gf.load_from_pkl(filename=f'{models_dir}scaler.pkl')
Predictor = predict.Predictor(model=model, scaler=scaler)
gf.save_as_pkl(obj=Predictor, filename=f'{models_dir}predictor.pkl', compress=6)

# Load Clean Data

In [None]:
df_raw = gf.load_from_pkl(f'{data_dir}df_raw.pkl')
df_train = gf.load_from_pkl(filename=f'{data_dir}df_train_05.pkl')
df_valid = gf.load_from_pkl(filename=f'{data_dir}df_valid_05.pkl')
df_test = gf.load_from_pkl(filename=f'{data_dir}df_test_05.pkl')

df_train.name = 'df_train'
df_valid.name = 'df_valid'
df_test.name = 'df_test'

#gf.plotting_line_all(dataframe=df_train)

# Modelling

## Fitting Model

In [None]:
model = fit_model.modelling(train_dataframe=df_train)

## Model Summary

In [None]:
print(model.summary())

## Save to PKL

In [None]:
#gf.create_pkl(obj=model, pkl=r'..\models\pkl\sarimax(0,0,0)(2,1,0)(12).pkl')

# End to End Process

## Step 1

In [4]:
prior = [100.23,200.23,300.23,np.NaN,500,440.23,330.23,202.23,np.NaN]
current = [10100.669,202.669,303.669,604.669,np.NaN,449.669,339.669,292.669,119.669]

df = Predictor.parsing_data(prior=prior, current=current)
print(Predictor.input_dict)
df

{'date': 'yyyymmdd', 'lq45': 0, 'jci': 0, 'idx30': 0, 'eido': 0, 'spy': 0, 'dom_b': 0, 'dom_s': 0, 'for_b': 0, 'for_s': 0}


Unnamed: 0_level_0,lq45,jci,idx30,eido,spy,dom_b,dom_s,for_b,for_s
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-11-17,100.23,200.23,300.23,0.0,500.0,440.23,330.23,202.23,0.0
2022-11-18,10100.669,202.669,303.669,604.669,0.0,449.669,339.669,292.669,119.669


## Step 2

In [5]:
# Maybe Unecessary
df_2 = DataPrep.resampling(dataframe=df, interval='B', resampling_method='median', fillna_method='ffill')
df_2

Unnamed: 0_level_0,lq45,jci,idx30,eido,spy,dom_b,dom_s,for_b,for_s
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-11-17,100.23,200.23,300.23,0.0,500.0,440.23,330.23,202.23,0.0
2022-11-18,10100.669,202.669,303.669,604.669,0.0,449.669,339.669,292.669,119.669


## Step 3

In [6]:
df_3 = DataPrep.enriching(dataframe=df_2)
df_3

Unnamed: 0_level_0,lq45,jci,idx30,eido,spy,dom_b,dom_s,for_b,for_s,dom_total,dom_net,for_total,for_net
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-11-17,100.23,200.23,300.23,0.0,500.0,440.23,330.23,202.23,0.0,770.46,110.0,202.23,202.23
2022-11-18,10100.669,202.669,303.669,604.669,0.0,449.669,339.669,292.669,119.669,789.338,110.0,412.338,173.0


## Step 4

In [7]:
df_seasonal = gf.load_from_pkl(f'{remodel_dir}value_for_seasonal.pkl')
df_4 = DataPrep.enriching_seasonal(dataframe=df_3, df_seasonal=df_seasonal)
df_4

Unnamed: 0_level_0,lq45,jci,idx30,eido,spy,dom_b,dom_s,for_b,for_s,dom_total,dom_net,for_total,for_net,seasonal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-11-17,100.23,200.23,300.23,0.0,500.0,440.23,330.23,202.23,0.0,770.46,110.0,202.23,202.23,1.009547
2022-11-18,10100.669,202.669,303.669,604.669,0.0,449.669,339.669,292.669,119.669,789.338,110.0,412.338,173.0,1.009547


## Step 5

In [8]:
pkl = f'{remodel_dir}list_non_stationary_cols.pkl'
df_5 = DataPrep.stationary_transform(dataframe=df_4, non_stationary_cols_pkl=pkl)
df_5

Unnamed: 0_level_0,lq45,jci,idx30,eido,spy,dom_b,dom_s,for_b,for_s,dom_total,dom_net,for_total,for_net,seasonal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-11-18,99.774908,0.012181,0.011455,inf,-1.0,0.021441,0.028583,292.669,119.669,0.024502,110.0,412.338,173.0,1.009547


## Step 6

In [10]:
df_value_for_outlier = gf.load_from_pkl(f'{remodel_dir}value_for_outlier.pkl')
df_6 = DataPrep.outlier_treatment_batch(dataframe=df_5, df_value_for_outlier=df_value_for_outlier)
df_6

Unnamed: 0_level_0,lq45,jci,idx30,eido,spy,dom_b,dom_s,for_b,for_s,dom_total,dom_net,for_total,for_net,seasonal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-11-18,0.013687,0.012181,0.011455,0.018044,-0.009033,0.021441,0.028583,3.96612,3.93384,0.024502,0.81014,7.8526,0.676,1.009547


## Step 7

In [12]:
df_7 = DataPrep.std_scaler_transform(dataframe=df_6, scaler=scaler)
df_7

Unnamed: 0_level_0,lq45,jci,idx30,eido,spy,dom_b,dom_s,for_b,for_s,dom_total,dom_net,for_total,for_net,seasonal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-11-18,1.421694,1.62087,1.159236,1.480886,-1.561916,0.052924,0.082377,1.779815,1.7237,0.07584,1.404022,1.803734,1.466343,0.561184


## Step 8

In [13]:
df_8 = Predictor.forecasting(dataframe=df_7, name='df_8')
df_8

Unnamed: 0_level_0,lq45,pred_lq45,lq45_inv,pred_lq45_inv
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-11-18,1.421694,1.194661,0.013687,0.011559


# Predicting

In [None]:
df_forecast_valid = Predictor.forecasting(dataframe=df_valid, name='pred_valid')
df_forecast_test = Predictor.forecasting(dataframe=df_test, name='pred_test')

In [None]:
df_forecast_test[['lq45_inv','pred_lq45_inv']].plot(figsize =(15,4))
df_forecast_valid[['lq45_inv','pred_lq45_inv']].plot(figsize =(15,4))
plt.show()

## Predict Real

In [None]:
for i in range(1,11):
    i_to = -1 * i
    i_from = i_to - 1
    df_real = gf.load_from_pkl(filename=f'{data_dir}df_test_05.pkl')[i_from:i_to]
    df_real_pred = Predictor.forecasting(dataframe=df_real, name='pred_real')


    print(df_real_pred)