# Import Libraries

In [2]:
!pip install onnx onnxruntime

Collecting onnx
  Downloading onnx-1.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.6/14.6 MB[0m [31m98.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting onnxruntime
  Downloading onnxruntime-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m120.0 MB/s[0m eta [36m0:00:00[0m
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx, humanfriendl

In [3]:
import pandas as pd
import numpy as np
import os
import argparse

from tqdm.auto import tqdm
from glob import glob

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

import onnx
import onnxruntime as ort

# Load ONNX Model

In [4]:
def get_model(onnx_path):
    # 6. Create ONNX Session
    # ONNX 모델을 로드합니다.
    model = onnx.load(onnx_path)

    # ONNX 런타임 세션을 생성합니다.
    model = ort.InferenceSession(onnx_path)
    return model

# Parameter Setting

In [None]:
# Get Total Data
data_path = "/content/drive/MyDrive/DKU/Ubiosis/total_data.csv"
data_df = pd.read_csv(data_path)

In [5]:
# Get ONNX Model
onnx_path = "/content/drive/MyDrive/DKU/Ubiosis/onnx_model/tabnet_model_230702.onnx"
model = get_model(onnx_path)

# Data Pre-Processing

In [33]:
def prepare_dataset(data_df, c_mode = "all", cat_col=[], scale_list = [1,1,1,1,1,1,1,1,1]):
    # 결측치 제거
    data_df.dropna(axis=0,inplace=True)

    # Radius Label Encoding
    cat_dims={}
    for i in tqdm(range(len(cat_col))):
        l_enc = LabelEncoder()
        cat = cat_col[i]
        data_df[cat] = l_enc.fit_transform(data_df[cat].values)
        cat_dims[cat] = len(l_enc.classes_)

    # 데이터 정보 별 데이터프레임 분할
    radius_df = data_df.iloc[:,:1]
    cis1_df = data_df.iloc[:,1:6001]
    cis2_df = data_df.iloc[:,6001:12001]
    shear_df = data_df.iloc[:,12001:]
    shear_df = shear_df.div(scale_list, axis=1)
    re_cols = ["1","2","5","10","50","100","150","300","1000"]
    re_shear_df = shear_df[re_cols]

    if c_mode == "all":
        data = pd.concat([radius_df, cis1_df, cis2_df,re_shear_df],axis=1)
    elif c_mode == "only2":
        data = pd.concat([radius_df, cis2_df,re_shear_df],axis=1)
    else:
        data = pd.DataFrame()

    return data, cat_dims

c_mode = "all"
cat_col = ["RADIUS"]
scale_list = [10,10,10,10,10,10,10,15,20]

data, cat_dims = prepare_dataset(data_df, c_mode, cat_col, scale_list)
data.tail(3)

  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,RADIUS,CIS_1_1,CIS_1_2,CIS_1_3,CIS_1_4,CIS_1_5,CIS_1_6,CIS_1_7,CIS_1_8,CIS_1_9,...,CIS_2_6000,1,2,5,10,50,100,150,300,1000
79333,10,596.0,596.0,596.0,596.0,596.0,596.0,596.0,596.0,596.0,...,523.0,1.525,1.3,1.17,0.85,0.51,0.44,0.41,0.37,0.31
79334,6,630.0,630.0,630.0,630.0,630.0,630.0,630.0,630.0,630.0,...,517.0,0.255,0.28,0.35,0.32,0.27,0.27,0.26,0.26,0.26
79335,6,594.0,594.0,594.0,594.0,595.0,595.0,595.0,595.0,595.0,...,540.0,1.325,1.14,1.04,0.77,0.47,0.41,0.39,0.36,0.35


## Data Split

In [43]:
train, test = train_test_split(data, test_size=0.2, random_state=530)
train, valid = train_test_split(train, test_size=0.2, random_state=530)

X_train = train.iloc[:,:-9].reset_index(drop=True).to_numpy()
y_train = train.iloc[:,-9:].reset_index(drop=True).to_numpy()
X_valid = valid.iloc[:,:-9].reset_index(drop=True).to_numpy()
y_valid = valid.iloc[:,-9:].reset_index(drop=True).to_numpy()
X_test = test.iloc[:,:-9].reset_index(drop=True).to_numpy().astype(np.float32)
y_test = test.iloc[:,-9:].reset_index(drop=True).to_numpy()

In [44]:
len(y_test)

15868

# ONNX Predict

In [22]:
# Predict
cis_data = X_test.copy()

y_preds = []
for i in tqdm(range(len(cis_data))):
    data = np.expand_dims(cis_data[i], axis=0)
    input_data = {'input': data}
    outputs = model.run(None, input_data)
    shear_1000 = round(outputs[0][0][0], 1)
    shear_300 = round(outputs[0][0][1], 1)
    shear_150 = round(outputs[0][0][2], 1)
    shear_100 = round(outputs[0][0][3], 1)
    shear_50 = round(outputs[0][0][4], 1)
    shear_10 = round(outputs[0][0][5], 1)
    shear_5 = round(outputs[0][0][6], 1)
    shear_2 = round(outputs[0][0][7], 1)
    shear_1 = round(outputs[0][0][8], 1)
    y_pred = [shear_1,shear_2,shear_5,shear_10,shear_50,shear_100,shear_150,shear_300,shear_1000]
    y_preds.append(y_pred)

  0%|          | 0/15868 [00:00<?, ?it/s]

In [35]:
y_preds[:5]

[[28.4, 18.5, 11.3, 8.7, 5.2, 4.5, 4.5, 3.5, 3.0],
 [32.1, 20.5, 12.6, 9.4, 5.6, 4.8, 4.5, 4.1, 3.7],
 [27.0, 17.4, 10.8, 8.1, 4.9, 4.2, 3.9, 3.6, 3.2],
 [19.0, 12.5, 8.0, 6.1, 3.8, 3.4, 3.2, 2.9, 2.6],
 [30.9, 19.8, 12.2, 9.0, 5.4, 4.7, 4.4, 4.0, 3.6]]

In [45]:
scale_list.sort(reverse=True)
def unscale_values(y_list):
    unscale_list = [x * y for x,y in zip(y_list,scale_list)]
    return unscale_list

# un_y_pred = unscale_values(y_preds)
y_test = np.expand_dims(y_test, axis=0)
un_y_test = unscale_values(y_test)
print(len(un_y_test))

1


In [46]:
un_y_test

[array([[36.09999847, 30.39999898, 27.        , ...,  9.19999981,
          8.19999981,  7.5999999 ],
        [36.29999924, 30.53333282, 27.20000076, ...,  9.19999981,
          8.39999962,  7.4000001 ],
        [28.79999924, 24.80000051, 22.79999924, ...,  8.60000038,
          7.80000019,  7.19999981],
        ...,
        [27.20000076, 23.19999949, 21.20000076, ...,  7.5999999 ,
          7.        ,  6.        ],
        [36.59999847, 30.93333435, 27.79999924, ...,  9.60000038,
          8.60000038,  7.5999999 ],
        [35.40000153, 30.        , 27.        , ...,  9.39999962,
          8.60000038,  7.5999999 ]])]

In [47]:
# scale_list.sort(reverse=True)
# def unscale_values(y_list):
#     unscale_list = [x * y for x,y in zip(y_list,scale_list)]
#     return unscale_list

# # un_y_pred = unscale_values(y_preds)
# un_y_test = unscale_values(y_test)

re_cols = ["1","2","5","10","50","100","150","300","1000"]

y_real_df = pd.DataFrame(columns=re_cols, data=un_y_test[0])
y_pred_df = pd.DataFrame(columns=re_cols, data=y_preds)

In [48]:
y_real_df.tail(3)

Unnamed: 0,1,2,5,10,50,100,150,300,1000
15865,27.200001,23.199999,21.200001,15.6,9.4,8.2,7.6,7.0,6.0
15866,36.599998,30.933334,27.799999,20.200001,11.8,10.2,9.6,8.6,7.6
15867,35.400002,30.0,27.0,19.6,11.6,10.0,9.4,8.6,7.6


In [49]:
y_pred_df.tail(3)

Unnamed: 0,1,2,5,10,50,100,150,300,1000
15865,26.700001,17.299999,10.8,8.0,4.9,4.3,4.0,3.7,3.3
15866,61.099998,37.799999,22.0,15.5,8.7,6.7,5.8,4.7,3.6
15867,32.700001,20.9,12.8,9.6,5.8,5.0,4.6,4.2,3.8


In [50]:
def pearson_correlation_coefficient(X, Y):
    """
    두 변수 X와 Y 간의 피어슨 상관계수를 계산하는 함수

    :param X: 첫 번째 변수의 값들을 담은 1차원 NumPy 배열
    :param Y: 두 번째 변수의 값들을 담은 1차원 NumPy 배열
    :return: 피어슨 상관계수
    """
    # 변수들의 평균 계산
    mean_X = np.mean(X)
    mean_Y = np.mean(Y)

    # 각 변수들의 편차 계산
    deviation_X = X - mean_X
    deviation_Y = Y - mean_Y

    # 피어슨 상관계수의 분자 계산
    numerator = np.sum(deviation_X * deviation_Y)

    # 피어슨 상관계수의 분모 계산
    denominator = np.sqrt(np.sum(deviation_X ** 2) * np.sum(deviation_Y ** 2))

    # 피어슨 상관계수 계산
    pearson_coefficient = numerator / denominator

    return pearson_coefficient

In [51]:
mae_list = []
mse_list = []
rmse_list = []
mape_list = []
r2_list = []
pc_list = []

for i in range(len(re_cols)):
    print(f"{re_cols[i]}")
    real_v = list(y_real_df[re_cols[i]].values)
    pred_v = list(y_pred_df[re_cols[i]].values)

    mae = mean_absolute_error(real_v, pred_v)
    mse = mean_squared_error(real_v, pred_v)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(real_v, pred_v)
    r2_scores = r2_score(real_v, pred_v)
    pc_scores = pearson_correlation_coefficient(real_v, pred_v)

    mae_list.append(mae)
    mse_list.append(mse)
    rmse_list.append(rmse)
    mape_list.append(mape)
    r2_list.append(r2_scores)
    pc_list.append(pc_scores)

ev_df = pd.DataFrame(columns=["MAE","MSE","RMSE","MAPE","R2","Pearson"])
ev_df["MAE"]=mae_list
ev_df["MSE"]=mse_list
ev_df["RMSE"]=rmse_list
ev_df["MAPE"]=mape_list
ev_df["R2"]=r2_list
ev_df["Pearson"]=pc_list
ev_df.index = re_cols

1
2
5
10
50
100
150
300
1000


In [52]:
ev_df.head(9)

Unnamed: 0,MAE,MSE,RMSE,MAPE,R2,Pearson
1,4.918137,70.98208,8.425086,0.156993,0.38514,0.671353
2,8.737245,109.45291,10.461974,0.306575,-0.535245,0.687073
5,13.01883,207.470543,14.403838,0.517475,-2.725143,0.631238
10,9.285297,98.72634,9.936113,0.508027,-3.784403,0.710998
50,5.529708,33.756497,5.810034,0.508241,-5.544008,0.706716
100,4.80005,24.984379,4.998438,0.50973,-6.446328,0.681981
150,4.450567,21.447361,4.63113,0.504636,-6.77951,0.670724
300,4.136211,18.310437,4.27907,0.516833,-7.210586,0.663118
1000,3.736703,15.02378,3.876052,0.521234,-6.857471,0.61656
