# Import Libraries

In [None]:
!pip install onnx onnxruntime

In [None]:
import pandas as pd
import numpy as np
import os
import argparse

from tqdm.auto import tqdm
from glob import glob

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

import onnx
import onnxruntime as ort

# Load ONNX Model

In [None]:
def get_model(onnx_path):
    # 6. Create ONNX Session
    # ONNX 모델을 로드합니다.
    model = onnx.load(onnx_path)

    # ONNX 런타임 세션을 생성합니다.
    model = ort.InferenceSession(onnx_path)
    return model

# Parameter Setting

In [None]:
# Get Total Data
data_path = "/content/drive/MyDrive/DKU/Ubiosis/total_data.csv"
data_df = pd.read_csv(data_path)

In [None]:
# Get ONNX Model
onnx_path = "/content/drive/MyDrive/DKU/Ubiosis/onnx_model/tabnet_model_230702.onnx"
model = get_model(onnx_path)

# Data Pre-Processing

In [None]:
def prepare_dataset(data_df, c_mode = "all", cat_col=[], scale_list = [1,1,1,1,1,1,1,1,1]):
    # 결측치 제거
    data_df.dropna(axis=0,inplace=True)

    # Radius Label Encoding
    cat_dims={}
    for i in tqdm(range(len(cat_col))):
        l_enc = LabelEncoder()
        cat = cat_col[i]
        data_df[cat] = l_enc.fit_transform(data_df[cat].values)
        cat_dims[cat] = len(l_enc.classes_)

    # 데이터 정보 별 데이터프레임 분할
    radius_df = data_df.iloc[:,:1]
    cis1_df = data_df.iloc[:,1:6001]
    cis2_df = data_df.iloc[:,6001:12001]
    shear_df = data_df.iloc[:,12001:]
    shear_df = shear_df.div(scale_list, axis=1)
    re_cols = ["1","2","5","10","50","100","150","300","1000"]
    re_shear_df = shear_df[re_cols]

    if c_mode == "all":
        data = pd.concat([radius_df, cis1_df, cis2_df,re_shear_df],axis=1)
    elif c_mode == "only2":
        data = pd.concat([radius_df, cis2_df,re_shear_df],axis=1)
    else:
        data = pd.DataFrame()

    return data, cat_dims

c_mode = "all"
cat_col = ["RADIUS"]
scale_list = [10,10,10,10,10,10,10,15,20]

data, cat_dims = prepare_dataset(data_df, c_mode, cat_col, scale_list)
data.tail(3)

## Data Split

In [None]:
train, test = train_test_split(data, test_size=0.2, random_state=530)
train, valid = train_test_split(train, test_size=0.2, random_state=530)

X_train = train.iloc[:,:-9].reset_index(drop=True).to_numpy()
y_train = train.iloc[:,-9:].reset_index(drop=True).to_numpy()
X_valid = valid.iloc[:,:-9].reset_index(drop=True).to_numpy()
y_valid = valid.iloc[:,-9:].reset_index(drop=True).to_numpy()
X_test = test.iloc[:,:-9].reset_index(drop=True).to_numpy().astype(np.float32)
y_test = test.iloc[:,-9:].reset_index(drop=True).to_numpy()

In [None]:
len(y_test)

# ONNX Predict

In [None]:
# Predict
cis_data = X_test.copy()

y_preds = []
for i in tqdm(range(len(cis_data))):
    data = np.expand_dims(cis_data[i], axis=0)
    input_data = {'input': data}
    outputs = model.run(None, input_data)
    shear_1000 = round(outputs[0][0][0], 1)
    shear_300 = round(outputs[0][0][1], 1)
    shear_150 = round(outputs[0][0][2], 1)
    shear_100 = round(outputs[0][0][3], 1)
    shear_50 = round(outputs[0][0][4], 1)
    shear_10 = round(outputs[0][0][5], 1)
    shear_5 = round(outputs[0][0][6], 1)
    shear_2 = round(outputs[0][0][7], 1)
    shear_1 = round(outputs[0][0][8], 1)
    y_pred = [shear_1,shear_2,shear_5,shear_10,shear_50,shear_100,shear_150,shear_300,shear_1000]
    y_preds.append(y_pred)

In [None]:
y_preds[:5]

In [None]:
scale_list.sort(reverse=True)
def unscale_values(y_list):
    unscale_list = [x * y for x,y in zip(y_list,scale_list)]
    return unscale_list

# un_y_pred = unscale_values(y_preds)
y_test = np.expand_dims(y_test, axis=0)
un_y_test = unscale_values(y_test)
print(len(un_y_test))

In [None]:
un_y_test

In [None]:
# scale_list.sort(reverse=True)
# def unscale_values(y_list):
#     unscale_list = [x * y for x,y in zip(y_list,scale_list)]
#     return unscale_list

# # un_y_pred = unscale_values(y_preds)
# un_y_test = unscale_values(y_test)

re_cols = ["1","2","5","10","50","100","150","300","1000"]

y_real_df = pd.DataFrame(columns=re_cols, data=un_y_test[0])
y_pred_df = pd.DataFrame(columns=re_cols, data=y_preds)

In [None]:
y_real_df.tail(3)

In [None]:
y_pred_df.tail(3)

In [None]:
def pearson_correlation_coefficient(X, Y):
    """
    두 변수 X와 Y 간의 피어슨 상관계수를 계산하는 함수

    :param X: 첫 번째 변수의 값들을 담은 1차원 NumPy 배열
    :param Y: 두 번째 변수의 값들을 담은 1차원 NumPy 배열
    :return: 피어슨 상관계수
    """
    # 변수들의 평균 계산
    mean_X = np.mean(X)
    mean_Y = np.mean(Y)

    # 각 변수들의 편차 계산
    deviation_X = X - mean_X
    deviation_Y = Y - mean_Y

    # 피어슨 상관계수의 분자 계산
    numerator = np.sum(deviation_X * deviation_Y)

    # 피어슨 상관계수의 분모 계산
    denominator = np.sqrt(np.sum(deviation_X ** 2) * np.sum(deviation_Y ** 2))

    # 피어슨 상관계수 계산
    pearson_coefficient = numerator / denominator

    return pearson_coefficient

In [None]:
mae_list = []
mse_list = []
rmse_list = []
mape_list = []
r2_list = []
pc_list = []

for i in range(len(re_cols)):
    print(f"{re_cols[i]}")
    real_v = list(y_real_df[re_cols[i]].values)
    pred_v = list(y_pred_df[re_cols[i]].values)

    mae = mean_absolute_error(real_v, pred_v)
    mse = mean_squared_error(real_v, pred_v)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(real_v, pred_v)
    r2_scores = r2_score(real_v, pred_v)
    pc_scores = pearson_correlation_coefficient(real_v, pred_v)

    mae_list.append(mae)
    mse_list.append(mse)
    rmse_list.append(rmse)
    mape_list.append(mape)
    r2_list.append(r2_scores)
    pc_list.append(pc_scores)

ev_df = pd.DataFrame(columns=["MAE","MSE","RMSE","MAPE","R2","Pearson"])
ev_df["MAE"]=mae_list
ev_df["MSE"]=mse_list
ev_df["RMSE"]=rmse_list
ev_df["MAPE"]=mape_list
ev_df["R2"]=r2_list
ev_df["Pearson"]=pc_list
ev_df.index = re_cols

In [None]:
ev_df.head(9)