# Import Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')
import random
random.seed(530)

from glob import glob
from tqdm.auto import tqdm

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense

# Data Load

In [None]:
data_path = "/content/drive/MyDrive/DKU/Ubiosis/total_data.csv"
data_df = pd.read_csv(data_path)

In [None]:
# 결측치 제거
data_df.dropna(axis=0,inplace=True)
data_df.tail(3)

Unnamed: 0,RADIUS,CIS_1_1,CIS_1_2,CIS_1_3,CIS_1_4,CIS_1_5,CIS_1_6,CIS_1_7,CIS_1_8,CIS_1_9,...,CIS_2_6000,1000,300,150,100,50,10,5,2,1
79333,3.2,596.0,596.0,596.0,596.0,596.0,596.0,596.0,596.0,596.0,...,523.0,3.1,3.7,4.1,4.4,5.1,8.5,11.7,19.5,30.5
79334,3.16,630.0,630.0,630.0,630.0,630.0,630.0,630.0,630.0,630.0,...,517.0,2.6,2.6,2.6,2.7,2.7,3.2,3.5,4.2,5.1
79335,3.16,594.0,594.0,594.0,594.0,595.0,595.0,595.0,595.0,595.0,...,540.0,3.5,3.6,3.9,4.1,4.7,7.7,10.4,17.1,26.5


In [None]:
# 데이터 정보 별 데이터프레임 분할
radius_df = data_df.iloc[:,:1]
cis1_df = data_df.iloc[:,1:6001]
cis2_df = data_df.iloc[:,6001:12001]
shear_df = data_df.iloc[:,12001:]

In [None]:
# one-hot encoding
ohe_cols = []
for i in range(0,19):
    ohe_cols.append(((310+i)/100))

ohe_target = np.array(ohe_cols).reshape(-1,1)
ohe_value = np.array(radius_df["RADIUS"]).reshape(-1,1)

ohe = OneHotEncoder()
ohe.fit(ohe_target)

ohe_labels = ohe.transform(ohe_value)
ohe_targets = ohe_labels.toarray()

ohe_df = pd.DataFrame(columns=ohe_cols,data=ohe_targets)
ohe_df.head(3)

Unnamed: 0,3.10,3.11,3.12,3.13,3.14,3.15,3.16,3.17,3.18,3.19,3.20,3.21,3.22,3.23,3.24,3.25,3.26,3.27,3.28
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Shear Rate Scaling
scale_list = [10,10,10,10,10,10,10,15,20]
shear_df = shear_df.div(scale_list, axis=1)
shear_df.head(3)

Unnamed: 0,1000,300,150,100,50,10,5,2,1
0,0.31,0.35,0.38,0.41,0.47,0.76,1.03,1.113333,1.29
1,0.45,0.49,0.53,0.56,0.63,1.1,1.55,1.766667,2.125
2,0.34,0.37,0.4,0.43,0.49,0.81,1.1,1.206667,1.41


In [None]:
data = pd.concat([ohe_df,cis1_df, cis2_df,shear_df],axis=1)
data.head(3)

Unnamed: 0,3.1,3.11,3.12,3.13,3.14,3.15,3.16,3.17,3.18,3.19,...,CIS_2_6000,1000,300,150,100,50,10,5,2,1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,522.0,0.31,0.35,0.38,0.41,0.47,0.76,1.03,1.113333,1.29
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,513.0,0.45,0.49,0.53,0.56,0.63,1.1,1.55,1.766667,2.125
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,526.0,0.34,0.37,0.4,0.43,0.49,0.81,1.1,1.206667,1.41


# Dataset Split
- 학습용 및 시험용 데이터셋으로 분할

In [None]:
train, test = train_test_split(data, test_size=0.2, random_state=530)
train, valid = train_test_split(train, test_size=0.2, random_state=530)

X_train = train.iloc[:,:-9].reset_index(drop=True)
y_train = train.iloc[:,-9:].reset_index(drop=True)
X_valid = valid.iloc[:,:-9].reset_index(drop=True)
y_valid = valid.iloc[:,-9:].reset_index(drop=True)
X_test = test.iloc[:,:-9].reset_index(drop=True)
y_test = test.iloc[:,-9:].reset_index(drop=True)

In [None]:
del data, data_df

# Model Define

In [None]:
def get_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(5096, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(2048, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(1024, input_dim=n_inputs, activation='relu'))
    model.add(Dense(512, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(256, input_dim=n_inputs, activation='relu'))
    model.add(Dense(128, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(n_outputs, activation="linear")) # activation="linear"
    opt = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    model.compile(loss='mse', optimizer=opt,
                  metrics=[tf.keras.metrics.MeanAbsoluteError(),tf.keras.metrics.MeanAbsolutePercentageError(),
                           tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.RootMeanSquaredError()])

    return model

# Model Train

In [None]:
n_inputs, n_outputs = X_train.shape[1], y_train.shape[1]
model = get_model(n_inputs, n_outputs)

In [None]:
# Create the EarlyStopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
model.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          validation_batch_size=32,
          verbose=1, epochs=100, batch_size=32,callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
  82/1587 [>.............................] - ETA: 8s - loss: 0.0191 - mean_absolute_error: 0.0647 - mean_absolute_percentage_error: 7.4108 - mean_squared_error: 0.0191 - root_mean_squared_error: 0.1382

# Model Test

In [None]:
y_pred = model.predict(X_test)

NameError: ignored

In [None]:
y_pred[:10]

array([[0.36885038, 0.3955201 , 0.45167863, 0.4927889 , 0.5859499 ,
        0.9201636 , 1.3083764 , 1.4865295 , 1.7158341 ],
       [0.39241424, 0.42278278, 0.47642517, 0.511397  , 0.61683416,
        0.9943842 , 1.4054543 , 1.610058  , 1.8683647 ],
       [0.3397042 , 0.36406696, 0.4217    , 0.4594723 , 0.53924847,
        0.8297638 , 1.1761876 , 1.3199054 , 1.5135673 ],
       [0.2854449 , 0.3129977 , 0.34823883, 0.37008536, 0.43457246,
        0.6490721 , 0.8753942 , 0.95036   , 1.0669858 ],
       [0.37328115, 0.3992461 , 0.46057844, 0.503986  , 0.59868336,
        0.93960327, 1.341972  , 1.5269881 , 1.7680799 ],
       [0.3625051 , 0.3943509 , 0.44292402, 0.47689593, 0.5695696 ,
        0.90331024, 1.2682582 , 1.4381152 , 1.6572243 ],
       [0.3863174 , 0.42296398, 0.4704795 , 0.50741065, 0.6110163 ,
        0.98237365, 1.3859912 , 1.5816613 , 1.8348936 ],
       [0.35472742, 0.38685882, 0.4308957 , 0.4547993 , 0.54777575,
        0.8748736 , 1.2132565 , 1.3768731 , 1.5792906 ],


In [None]:
print(f"X Test Length : {len(X_test)}")
print(f"Y Test Length : {len(y_test)}")
print(f"Y Pred Length : {len(y_pred)}")

X Test Length : 23801
Y Test Length : 23801
Y Pred Length : 23801


In [None]:
def unscale_values(scaled_list):
    unscale_list = []
    for i in range(len(scaled_list)):
        row = scaled_list[i]
        for j in range(len(scale_list)):
            row[j] = row[j] * scale_list[j]
        unscale_list.append(row)
    return unscale_list

un_y_pred = unscale_values(y_pred)
un_y_test = unscale_values(y_test.values)

In [None]:
un_y_pred[:3]

[array([ 3.6885037,  3.955201 ,  4.5167866,  4.927889 ,  5.859499 ,
         9.201635 , 13.083764 , 22.297941 , 34.31668  ], dtype=float32),
 array([ 3.9241424,  4.227828 ,  4.7642517,  5.11397  ,  6.1683416,
         9.943842 , 14.054543 , 24.15087  , 37.367294 ], dtype=float32),
 array([ 3.3970418,  3.6406696,  4.217    ,  4.5947227,  5.3924847,
         8.297638 , 11.761876 , 19.79858  , 30.271347 ], dtype=float32)]

In [None]:
un_y_test[:3]

[array([ 3.79999995,  4.0999999 ,  4.5999999 ,  4.9000001 ,  5.69999981,
         9.80000019, 13.5       , 22.79999924, 36.09999847]),
 array([ 3.70000005,  4.19999981,  4.5999999 ,  4.9000001 ,  5.69999981,
         9.80000019, 13.60000038, 22.89999962, 36.29999924]),
 array([ 3.5999999 ,  3.9000001 ,  4.30000019,  4.5       ,  5.19999981,
         8.39999962, 11.39999962, 18.60000038, 28.79999924])]

In [None]:
col_list = ["1000","300", "150", "100", "50", "10", "5", "2", "1"]
y_real_df = pd.DataFrame(columns=col_list, data=un_y_test)

In [None]:
y_real_df.head(3)

Unnamed: 0,1000,300,150,100,50,10,5,2,1
0,3.8,4.1,4.6,4.9,5.7,9.8,13.5,22.799999,36.099998
1,3.7,4.2,4.6,4.9,5.7,9.8,13.6,22.9,36.299999
2,3.6,3.9,4.3,4.5,5.2,8.4,11.4,18.6,28.799999


In [None]:
y_pred_df = pd.DataFrame(columns=col_list, data=un_y_pred)

In [None]:
y_pred_df.head(3)

Unnamed: 0,1000,300,150,100,50,10,5,2,1
0,3.688504,3.955201,4.516787,4.927889,5.859499,9.201635,13.083764,22.297941,34.316681
1,3.924142,4.227828,4.764252,5.11397,6.168342,9.943842,14.054543,24.150869,37.367294
2,3.397042,3.64067,4.217,4.594723,5.392485,8.297638,11.761876,19.79858,30.271347


### Model Evaluation

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

def pearson_correlation_coefficient(X, Y):
    """
    두 변수 X와 Y 간의 피어슨 상관계수를 계산하는 함수

    :param X: 첫 번째 변수의 값들을 담은 1차원 NumPy 배열
    :param Y: 두 번째 변수의 값들을 담은 1차원 NumPy 배열
    :return: 피어슨 상관계수
    """
    # 변수들의 평균 계산
    mean_X = np.mean(X)
    mean_Y = np.mean(Y)

    # 각 변수들의 편차 계산
    deviation_X = X - mean_X
    deviation_Y = Y - mean_Y

    # 피어슨 상관계수의 분자 계산
    numerator = np.sum(deviation_X * deviation_Y)

    # 피어슨 상관계수의 분모 계산
    denominator = np.sqrt(np.sum(deviation_X ** 2) * np.sum(deviation_Y ** 2))

    # 피어슨 상관계수 계산
    pearson_coefficient = numerator / denominator

    return pearson_coefficient

In [None]:
for i in range(len(col_list)):
    print(f"{col_list[i]}")
    real_v = list(y_real_df[col_list[i]].values)
    pred_v = list(y_pred_df[col_list[i]].values)

    mae = mean_absolute_error(real_v, pred_v)
    mse = mean_squared_error(real_v, pred_v)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(real_v, pred_v)
    r2_scores = r2_score(real_v, pred_v)
    pearson_scores = pearson_correlation_coefficient(real_v, pred_v)

    print(f"MAE : {mae}")
    print(f"MSE : {mse}")
    print(f"RMSE : {rmse}")
    print(f"MAPE : {mape}")
    print(f"R2 : {r2_scores}")
    print(f"Pearson : {pearson_scores}")
    print()

1000
MAE : 0.24662127818767177
MSE : 0.12435009626573916
RMSE : 0.3526330901457479
MAPE : 0.06589363706363653
R2 : 0.7321564919490262
Pearson : 0.8743972146131833

300
MAE : 0.29766817025980835
MSE : 0.1780299018959677
RMSE : 0.4219358978517563
MAPE : 0.06965193578367493
R2 : 0.6708503938009835
Pearson : 0.8872040634761839

150
MAE : 0.27856711973478243
MSE : 0.170610724516426
RMSE : 0.41305051085360733
MAPE : 0.060128577308613915
R2 : 0.7448464075229119
Pearson : 0.8836581522134888

100
MAE : 0.29434763111540674
MSE : 0.21359540498131496
RMSE : 0.4621638291572751
MAPE : 0.059201123475283286
R2 : 0.7376185848634275
Pearson : 0.8706485323999705

50
MAE : 0.36663050510007733
MSE : 0.3679141745025895
RMSE : 0.6065592918277565
MAPE : 0.06465454025088344
R2 : 0.707156062390442
Pearson : 0.8414811001927093

10
MAE : 0.7318571742194715
MSE : 1.8106384935721145
RMSE : 1.345599678051431
MAPE : 0.0742357721909899
R2 : 0.6421578891852728
Pearson : 0.8358154241449524

5
MAE : 1.0124468885546527
MS

# Save Model

In [None]:
import tf2onnx

# model.save('DNN_method1.h5') # 모델 저장
# re_model = tf.keras.models.load_model('DNN_method1.h5')

# 변환할 모델을 입력합니다.
input_signature = [
    tf.TensorSpec(shape=(None, 6002, 1), dtype=tf.float32),
    tf.TensorSpec(shape=(None, 6002, 1), dtype=tf.float32)
]
# tf2onnx 변환 함수를 사용하여 모델을 ONNX로 변환합니다.
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=input_signature)

# ONNX 모델을 파일로 저장합니다.
with open("DNN_method1.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())