# Import Libraries

In [4]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import random
random.seed(530)

from glob import glob
from tqdm.auto import tqdm

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import tensorflow as tf
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Input, Concatenate

# Data Load

In [2]:
data_path = "/Volumes/T7/Ubiosis/total_data.csv"
data_df = pd.read_csv(data_path)

## Data Pre-Processing

In [3]:
# 결측치 제거
data_df.dropna(axis=0,inplace=True)
data_df.head(3)

Unnamed: 0,RADIUS,CIS_1_1,CIS_1_2,CIS_1_3,CIS_1_4,CIS_1_5,CIS_1_6,CIS_1_7,CIS_1_8,CIS_1_9,...,CIS_2_6000,1000,300,150,100,50,10,5,2,1
0,3.18,602.0,602.0,602.0,602.0,602.0,602.0,602.0,602.0,602.0,...,522.0,3.1,3.5,3.8,4.1,4.7,7.6,10.3,16.700001,25.799999
1,3.2,597.0,597.0,597.0,597.0,597.0,597.0,597.0,597.0,597.0,...,513.0,4.5,4.9,5.3,5.6,6.3,11.0,15.5,26.5,42.5
2,3.16,599.0,599.0,599.0,599.0,599.0,599.0,599.0,600.0,600.0,...,526.0,3.4,3.7,4.0,4.3,4.9,8.1,11.0,18.1,28.200001


In [5]:
# 데이터 정보 별 데이터프레임 분할
radius_df = data_df.iloc[:,:1]
cis1_df = data_df.iloc[:,1:6001]
cis2_df = data_df.iloc[:,6001:12001]
shear_df = data_df.iloc[:,12001:]

In [7]:
# one-hot encoding
ohe_cols = []
for i in range(0,19):
    ohe_cols.append(((310+i)/100))

ohe_target = np.array(ohe_cols).reshape(-1,1)
ohe_value = np.array(radius_df["RADIUS"]).reshape(-1,1)

ohe = OneHotEncoder()
ohe.fit(ohe_target)

ohe_labels = ohe.transform(ohe_value)
ohe_targets = ohe_labels.toarray()

ohe_df = pd.DataFrame(columns=ohe_cols,data=ohe_targets)
ohe_df.head(3)

Unnamed: 0,3.10,3.11,3.12,3.13,3.14,3.15,3.16,3.17,3.18,3.19,3.20,3.21,3.22,3.23,3.24,3.25,3.26,3.27,3.28
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Shear Rate Scaling
scale_list = [10,10,10,10,10,10,10,15,20]
shear_df = shear_df.div(scale_list, axis=1)

shear_df.head(3)

Unnamed: 0,1000,300,150,100,50,10,5,2,1
0,0.31,0.35,0.38,0.41,0.47,0.76,1.03,1.113333,1.29
1,0.45,0.49,0.53,0.56,0.63,1.1,1.55,1.766667,2.125
2,0.34,0.37,0.4,0.43,0.49,0.81,1.1,1.206667,1.41


In [9]:
re_cols = ["1","2","5","10","50","100","150","300","1000"]
re_shear_df = shear_df[re_cols]
re_shear_df.tail(3)

Unnamed: 0,1,2,5,10,50,100,150,300,1000
79333,1.525,1.3,1.17,0.85,0.51,0.44,0.41,0.37,0.31
79334,0.255,0.28,0.35,0.32,0.27,0.27,0.26,0.26,0.26
79335,1.325,1.14,1.04,0.77,0.47,0.41,0.39,0.36,0.35


In [10]:
data = pd.concat([radius_df, cis1_df, cis2_df, re_shear_df],axis=1)
data.tail(3)

Unnamed: 0,RADIUS,CIS_1_1,CIS_1_2,CIS_1_3,CIS_1_4,CIS_1_5,CIS_1_6,CIS_1_7,CIS_1_8,CIS_1_9,...,CIS_2_6000,1,2,5,10,50,100,150,300,1000
79333,3.2,596.0,596.0,596.0,596.0,596.0,596.0,596.0,596.0,596.0,...,523.0,1.525,1.3,1.17,0.85,0.51,0.44,0.41,0.37,0.31
79334,3.16,630.0,630.0,630.0,630.0,630.0,630.0,630.0,630.0,630.0,...,517.0,0.255,0.28,0.35,0.32,0.27,0.27,0.26,0.26,0.26
79335,3.16,594.0,594.0,594.0,594.0,595.0,595.0,595.0,595.0,595.0,...,540.0,1.325,1.14,1.04,0.77,0.47,0.41,0.39,0.36,0.35


## Split Dataset

In [11]:
train, test = train_test_split(data, test_size=0.2, random_state=530)

X_train = train.iloc[:,:-9].reset_index(drop=True)
y_train = train.iloc[:,-9:].reset_index(drop=True)
X_test = test.iloc[:,:-9].reset_index(drop=True)
y_test = test.iloc[:,-9:].reset_index(drop=True)

In [12]:
# 데이터 정보 별 데이터프레임 분할
tr_radius_df = X_train.iloc[:,:1]
tr_cis1_df = X_train.iloc[:,1:6001]
tr_cis2_df = X_train.iloc[:,6001:]

te_radius_df = X_test.iloc[:,:1]
te_cis1_df = X_test.iloc[:,1:6001]
te_cis2_df = X_test.iloc[:,6001:]

# Define Model

In [14]:
def create_model(input_shape,extra_shape,output_shape):
    input_seq1 = tf.keras.layers.Input(shape=(input_shape,))
    input_seq2 = tf.keras.layers.Input(shape=(input_shape,))
    input_extra = tf.keras.layers.Input(shape=(extra_shape,))

    # 시퀀스 1에 대한 처리
    seq1_layer = tf.keras.layers.Dense(1024, activation='relu')(input_seq1)
    seq1_layer = tf.keras.layers.Dense(512, activation='relu')(seq1_layer)
    seq1_layer = tf.keras.layers.Dense(256, activation='relu')(seq1_layer)
    seq1_layer = tf.keras.layers.Dense(128, activation='relu')(seq1_layer)

    # 시퀀스 2에 대한 처리
    seq2_layer = tf.keras.layers.Dense(1024, activation='relu')(input_seq2)
    seq2_layer = tf.keras.layers.Dense(512, activation='relu')(seq2_layer)
    seq2_layer = tf.keras.layers.Dense(256, activation='relu')(seq2_layer)
    seq2_layer = tf.keras.layers.Dense(128, activation='relu')(seq2_layer)

    # 추가 데이터에 대한 처리
    extra_layer = tf.keras.layers.Dense(16, activation='relu')(input_extra)
    extra_layer = tf.keras.layers.Dense(32, activation='relu')(extra_layer)
    extra_layer = tf.keras.layers.Dense(64, activation='relu')(extra_layer)
    extra_layer = tf.keras.layers.Dense(128, activation='relu')(extra_layer)

    # 시퀀스와 추가 데이터 결합
    combined = tf.keras.layers.Concatenate()([seq1_layer, seq2_layer, extra_layer])
    # 결합된 시퀀스와 추가 데이터 처리
    layer = tf.keras.layers.Dense(128, activation='relu')(combined)
    layer = tf.keras.layers.Dense(64, activation='relu')(layer)
    layer = tf.keras.layers.Dense(32, activation='relu')(layer)

    # 출력 레이어 (9개의 클래스에 대한 확률 출력)
    output = tf.keras.layers.Dense(output_shape, activation='relu')(layer)

    model = tf.keras.Model(inputs=[input_seq1, input_seq2, input_extra], outputs=output)
    return model

# 모델 생성
model = create_model(tr_cis1_df.shape[1],tr_radius_df.shape[1],y_train.shape[1])

# 모델 컴파일
opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=opt, loss='mae',
              metrics=[tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.RootMeanSquaredError(),
                           tf.keras.metrics.MeanAbsoluteError(),tf.keras.metrics.MeanAbsolutePercentageError()])

Metal device set to: Apple M2

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



In [15]:
# 입력 데이터 준비
input_data = [tr_cis1_df, tr_cis2_df, tr_radius_df]  # 시퀀스 1, 시퀀스 2, 추가 데이터를 리스트로 묶어 입력 데이터로 사용

# 모델 학습
model.fit(input_data, y_train, epochs=100, batch_size=32,
          validation_split=0.2,validation_batch_size=32)

Epoch 1/100


2023-07-10 14:55:16.718386: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/100

# Model Result

In [8]:
test_input_data = [X_test_cis1, X_test_cis2, test_radius_val]

y_pred = model.predict(test_input_data)



In [11]:
y_pred[:5]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [9]:
print(f"X Test Length : {len(input_data)}")
print(f"Y Test Length : {len(y_train)}")

X Test Length : 3
Y Test Length : 6942


In [10]:
print(f"X Test Length : {len(test_input_data)}")
print(f"Y Test Length : {len(y_test)}")
print(f"Y Pred Length : {len(y_pred)}")

X Test Length : 3
Y Test Length : 2976
Y Pred Length : 2976


# Evaluation

In [None]:
mae_df, mse_df, rmse_df, mape_df, mpe_df = evaluation.get_evalution(test_input_data, y_test, y_pred)

In [13]:
mae_df.head(3)

Unnamed: 0,1000,300,150,100,50,10,5,2,1,Total
0,0.36,0.4,0.44,0.47,0.55,0.93,1.27,1.42,1.68,7.52
1,0.58,0.62,0.66,0.69,0.76,1.35,1.91,2.2,2.655,11.425
2,0.34,0.37,0.41,0.44,0.5,0.83,1.13,1.24,1.45,6.71


In [14]:
print(f"1000 mae : {sum(mae_df['1000'].values)/len(mae_df['1000'])}")
print(f"300 mae : {sum(mae_df['300'].values)/len(mae_df['300'])}")
print(f"150 mae : {sum(mae_df['150'].values)/len(mae_df['150'])}")
print(f"100 mae : {sum(mae_df['100'].values)/len(mae_df['100'])}")
print(f"50 mae : {sum(mae_df['50'].values)/len(mae_df['50'])}")
print(f"10 mae : {sum(mae_df['10'].values)/len(mae_df['10'])}")
print(f"5 mae : {sum(mae_df['5'].values)/len(mae_df['5'])}")
print(f"2 mae : {sum(mae_df['2'].values)/len(mae_df['2'])}")
print(f"1 mae : {sum(mae_df['1'].values)/len(mae_df['1'])}")
print(f"Total mae : {sum(mae_df['Total'].values)/len(mae_df['Total'])}")

1000 mae : 0.4266666730244954
300 mae : 0.46333332856496173
150 mae : 0.503333330154419
100 mae : 0.5333333333333333
50 mae : 0.6033333301544189
10 mae : 1.0366666793823243
5 mae : 1.4366666793823242
2 mae : 1.619999991522895
1 mae : 1.9283332824707033
Total mae : 8.551666627989876


In [15]:
mse_df.head(3)

Unnamed: 0,1000,300,150,100,50,10,5,2,1,Total
0,0.1296,0.16,0.1936,0.2209,0.3025,0.8649,1.6129,2.0164,2.8224,8.3232
1,0.3364,0.3844,0.4356,0.4761,0.5776,1.8225,3.6481,4.84,7.049025,19.569725
2,0.1156,0.1369,0.1681,0.1936,0.25,0.6889,1.2769,1.5376,2.1025,6.4701


In [16]:
print(f"1000 mse : {sum(mse_df['1000'].values)/len(mse_df['1000'])}")
print(f"300 mse : {sum(mse_df['300'].values)/len(mse_df['300'])}")
print(f"150 mse : {sum(mse_df['150'].values)/len(mse_df['150'])}")
print(f"100 mse : {sum(mse_df['100'].values)/len(mse_df['100'])}")
print(f"50 mse : {sum(mse_df['50'].values)/len(mse_df['50'])}")
print(f"10 mse : {sum(mse_df['10'].values)/len(mse_df['10'])}")
print(f"5 mse : {sum(mse_df['5'].values)/len(mse_df['5'])}")
print(f"2 mse : {sum(mse_df['2'].values)/len(mse_df['2'])}")
print(f"1 mse : {sum(mse_df['1'].values)/len(mse_df['1'])}")
print(f"Total mse : {sum(mse_df['Total'].values)/len(mse_df['Total'])}")

1000 mse : 0.19386667391459167
300 mse : 0.22709999329249078
150 mse : 0.26576666266123466
100 mse : 0.29686666787465427
50 mse : 0.37669999516805014
10 mse : 1.1254333557128908
5 mse : 2.1793000467936205
2 mse : 2.7979999728732654
1 mse : 3.9913081128438357
Total mse : 11.454341481134634


In [17]:
rmse_df.head(3)

Unnamed: 0,1000,300,150,100,50,10,5,2,1,Total
0,0.36,0.4,0.44,0.47,0.55,0.93,1.27,1.42,1.68,7.52
1,0.58,0.62,0.66,0.69,0.76,1.35,1.91,2.2,2.655,11.425
2,0.34,0.37,0.41,0.44,0.5,0.83,1.13,1.24,1.45,6.71


In [18]:
print(f"1000 rmse : {sum(rmse_df['1000'].values)/len(rmse_df['1000'])}")
print(f"300 rmse : {sum(rmse_df['300'].values)/len(rmse_df['300'])}")
print(f"150 rmse : {sum(rmse_df['150'].values)/len(rmse_df['150'])}")
print(f"100 rmse : {sum(rmse_df['100'].values)/len(rmse_df['100'])}")
print(f"50 rmse : {sum(rmse_df['50'].values)/len(rmse_df['50'])}")
print(f"10 rmse : {sum(rmse_df['10'].values)/len(rmse_df['10'])}")
print(f"5 rmse : {sum(rmse_df['5'].values)/len(rmse_df['5'])}")
print(f"2 rmse : {sum(rmse_df['2'].values)/len(rmse_df['2'])}")
print(f"1 rmse : {sum(rmse_df['1'].values)/len(rmse_df['1'])}")
print(f"Total rmse : {sum(rmse_df['Total'].values)/len(rmse_df['Total'])}")

1000 rmse : 0.4266666730244954
300 rmse : 0.46333332856496173
150 rmse : 0.503333330154419
100 rmse : 0.5333333333333333
50 rmse : 0.6033333301544189
10 rmse : 1.0366666793823243
5 rmse : 1.4366666793823242
2 rmse : 1.619999991522895
1 rmse : 1.9283332824707033
Total rmse : 8.551666627989876
