In [6]:
import os

import numpy as np
import pandas as pd

from scipy.stats import pearsonr
from sklearn.metrics import *
from sklearn.metrics import mean_absolute_error

from model.utils.tools import StandardScaler, MinMaxScaler, load_data_timeindex, load_data_DFM, set_lag_missing, repeat_label_row


In [4]:
from datetime import datetime as dt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

print("Python set up modules loaded")
print("Python version:\n", os.sys.version, "\nPlatform: ", os.sys.platform)
print("Base directory path:", os.getcwd())
print("Data/time:", dt.now())

# import statsmodels as sma
# import statsmodels.api as sm
# from statsmodels.tsa.stattools import adfuller
# from statsmodels.tsa.x13 import x13_arima_select_order, _find_x12

# print("STATSMODELS info (statsmodels._version.version)", sma._version.version)
# # v0.13.0 만 json version 정보 있음. v0.14.0 부터 없음.

import matplotlib.pyplot as plt
# plt.rc('font', family='NanumBarunGothic')
%matplotlib inline
import seaborn as sns

Python set up modules loaded
Python version:
 3.8.5 (tags/v3.8.5:580fbb0, Jul 20 2020, 15:57:54) [MSC v.1924 64 bit (AMD64)] 
Platform:  win32
Base directory path: d:\2023\SVU_SCT_FIN\SVU_SCT_FIN_SP\SVU_SCT_FIN\Model\BIVA
Data/time: 2023-08-04 12:13:40.141819


In [7]:
target = 'A1' # GDP grow ratio
seq_len = 6
flag = "train"

root = "D:/2023/SVU_SCT_FIN/SVU_SCT_FIN_SP/SVU_SCT_FIN/Model/BIVA/Base_data/"
file = "dataset_03_S.xlsx"
path = os.path.join(root,file)
df_Q, df_Q_trans, df_M, df_M_trans, var_info = load_data_timeindex(path)

period = {'M': ['2000-01','2023-01'], 'Q':['2000-03','2023-03']}
df_M = df_M.loc[period['M'][0]:period['M'][1]]
df_Q = df_Q.loc[period['Q'][0]:period['Q'][1]]

cols_M = list(df_M.columns)
cols_Q = list(df_Q.columns)
cols_Q.remove(target)
df_Q = df_Q[cols_Q + [target]]


type_map = {'train': 0, 'val': 1, 'test': 2}
set_type = type_map[flag]

num_train = int(len(df_M) * 0.8) 
num_test = int(len(df_M) * 0.1)
num_vali = len(df_M) - num_train - num_test
border1s = [0, num_train - seq_len, len(df_M) - num_test - seq_len]
border2s = [num_train, num_train + num_vali, len(df_M)]
border1 = border1s[set_type]
border2 = border2s[set_type]

border1s_Q = [0, (num_train - seq_len)//3, (len(df_M) - num_test - seq_len)//3]
border2s_Q = [num_train//3, (num_train + num_vali)//3, len(df_M)//3]
border1_Q = border1s_Q[set_type]
border2_Q = border2s_Q[set_type]

df_data = df_M[cols_M]
df_data_t = df_Q[[target]]

scaler_m = MinMaxScaler()
scaler_q = MinMaxScaler()

train_data = df_data[border1s[0]:border2s[0]]
df_data_cols = df_data.columns
df_data_index = df_data.index

train_data_t = df_data_t[border1s_Q[0]:border2s_Q[0]]
df_data_t_cols = df_data_t.columns
df_data_t_index = df_data_t.index

scaler_m.fit(train_data.values)
data = scaler_m.transform(df_data.values)
scaler_q.fit(train_data_t.values)
data_t = scaler_q.transform(df_data_t.values)

Q_max = scaler_q.max_num
Q_min = scaler_q.min_num
print(f"Q_max : {Q_max}")
print(f"Q_min : {Q_min}")

Q_max : -3.3
Q_min : 3.0


In [27]:
def Metric(pred, true_y) #, mean, std):

        #prepare
        result = pred[:]
        label = true_y[:]
        print(f"result.shape, label.shape: {result.shape}, {label.shape}")
        print(f"result[0]: {result[0]}")

        # result_scale = (result*std) + mean
        # label_scale = (label*std) + mean

        #MAE
        # MAE = mean_absolute_error(label, result)
        sumout = 0
        for id in range(result.shape[0]):
          out = mean_absolute_error(label[id],result[id])
          sumout += out
        MAE = sumout/(result.shape[0])
        print(f"MAE : {MAE}")

        #MSE
        sumout = 0
        for id in range(result.shape[0]):
          out = mean_squared_error(label[id],result[id])
          sumout += out
        MSE =  sumout/(result.shape[0])
        print(f"MSE : {MSE}")

        # RMSE (Root Mean Squared Error)
        sumout = 0
        for id in range(result.shape[0]):
          out = mean_squared_error(label[id],result[id])
          sumout += np.sqrt(out)
        RMSE =  sumout/(result.shape[0])
        print(f"RMSE : {RMSE}")

        #MAPE (Mean Absolute Percentage Error)
        sumout = 0
        for id in range(result.shape[0]):
          out = np.mean(np.abs((label[id] - result[id]) / label[id])) * 100 
          sumout += out
        N_MAPE = sumout/(result.shape[0])
        print(f"norm_MAPE : {N_MAPE}")

        # #MAPE_scale (Mean Absolute Percentage Error)
        # sumout = 0
        # for id in range(result_scale.shape[0]):
        #   out = np.mean(np.abs((label_scale[id] - result_scale[id]) / label_scale[id])) * 100 
        #   sumout += out
        # MAPE = sumout/(result_scale.shape[0])
        # print(f"denorm_MAPE : {MAPE}")
        return MAE, MSE, RMSE, N_MAPE #, MAPE

In [28]:
pred = np.load("C:/Users/PC/Downloads/BIVA_pred.npy")
true = np.load("C:/Users/PC/Downloads/BIVA_trues.npy")
print(f"pred.shape: {pred.shape}")
print(f"true.shape: {true.shape}")

MAE, MSE, RMSE, norm_MAPE, MAPE = Metric(pred, true, Q_mean, Q_std)


pred.shape: (28, 1, 1)
true.shape: (28, 1, 1)
result.shape, label.shape: (28, 1, 1), (28, 1, 1)
result[0]: [[-0.3949244]]
MAE : 0.5552071183919907
MSE : 0.42724806055360076
RMSE : 0.5552071183919907
norm_MAPE : 145.88428501571929
denorm_MAPE : 168.35953038043837


In [55]:
result_scale = (pred*Q_std) + Q_mean
label_scale = (true*Q_std) + Q_mean
label_scale = label_scale.reshape([28]).astype(np.float32)
result_scale = result_scale.reshape([28]).astype(np.float32)
#result_scale
np.sum(np.abs((label_scale - result_scale)/label_scale))/28



1.6835952486310686

In [1]:
# result file load
result_root = 'D:/2023/SVU_SCT_FIN/SVU_SCT_FIN_SP/SVU_SCT_FIN/Model/BIVA/exp/BAK_01/(BIVA)_result/BIVA-no1-dt20230802-seq6-pred1-bt1-lr0.15-mv6-v3x3/'
file_inputs = 'BIVA_inputs.npy'
file_imputed = 'BIVA_imputation.npy'
file_recons = 'BIVA_recons.npy'
file_pred = 'BIVA_pred.npy'
file_true = 'BIVA_trues.npy'

inputs = np.load(os.path.join(result_root,file_inputs))
imputed = np.load(os.path.join(result_root,file_imputed))
recons = np.load(os.path.join(result_root,file_recons))
pred = np.load(os.path.join(result_root,file_pred))
true = np.load(os.path.join(result_root,file_true))

NameError: name 'np' is not defined

In [None]:
I = imputed[0][:,0]
I = pd.DataFrame(I)
R = recons[0][:,0]
R = pd.DataFrame(R)

df_r = pd.concat([I,R],axis=1)
I.plot()

In [None]:
GT = true.squeeze()
GT = pd.DataFrame(GT)
PRED = pred.squeeze()
PRED = pd.DataFrame(PRED)
df = pd.concat([GT,PRED],axis=1)
df.plot()

In [None]:
df = pd.DataFrame(inputs[1])
plt.figure(figsize=(15,5))
sns.heatmap(df.T.isna().transpose(),
            cmap="YlGnBu",
            cbar=False,
            # cbar_kws={'label': 'Missing Data'}
            )
plt.show()