# Import Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import random
random.seed(530)

from glob import glob
from tqdm.auto import tqdm

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import tensorflow as tf
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Input, Concatenate

# Data Load

In [None]:
data_path = "/Volumes/T7/Ubiosis/total_data.csv"
data_df = pd.read_csv(data_path)

## Data Pre-Processing

In [None]:
# 결측치 제거
data_df.dropna(axis=0,inplace=True)
data_df.head(3)

In [None]:
# 데이터 정보 별 데이터프레임 분할
radius_df = data_df.iloc[:,:1]
cis1_df = data_df.iloc[:,1:6001]
cis2_df = data_df.iloc[:,6001:12001]
shear_df = data_df.iloc[:,12001:]

In [None]:
# one-hot encoding
ohe_cols = []
for i in range(0,19):
    ohe_cols.append(((310+i)/100))

ohe_target = np.array(ohe_cols).reshape(-1,1)
ohe_value = np.array(radius_df["RADIUS"]).reshape(-1,1)

ohe = OneHotEncoder()
ohe.fit(ohe_target)

ohe_labels = ohe.transform(ohe_value)
ohe_targets = ohe_labels.toarray()

ohe_df = pd.DataFrame(columns=ohe_cols,data=ohe_targets)
ohe_df.head(3)

In [None]:
# Shear Rate Scaling
scale_list = [10,10,10,10,10,10,10,15,20]
shear_df = shear_df.div(scale_list, axis=1)

shear_df.head(3)

In [None]:
re_cols = ["1","2","5","10","50","100","150","300","1000"]
re_shear_df = shear_df[re_cols]
re_shear_df.tail(3)

In [None]:
data = pd.concat([radius_df, cis1_df, cis2_df, re_shear_df],axis=1)
data.tail(3)

## Split Dataset

In [None]:
train, test = train_test_split(data, test_size=0.2, random_state=530)

X_train = train.iloc[:,:-9].reset_index(drop=True)
y_train = train.iloc[:,-9:].reset_index(drop=True)
X_test = test.iloc[:,:-9].reset_index(drop=True)
y_test = test.iloc[:,-9:].reset_index(drop=True)

In [None]:
# 데이터 정보 별 데이터프레임 분할
tr_radius_df = X_train.iloc[:,:1]
tr_cis1_df = X_train.iloc[:,1:6001]
tr_cis2_df = X_train.iloc[:,6001:]

te_radius_df = X_test.iloc[:,:1]
te_cis1_df = X_test.iloc[:,1:6001]
te_cis2_df = X_test.iloc[:,6001:]

# Define Model

In [None]:
def create_model(input_shape,extra_shape,output_shape):
    input_seq1 = tf.keras.layers.Input(shape=(input_shape,))
    input_seq2 = tf.keras.layers.Input(shape=(input_shape,))
    input_extra = tf.keras.layers.Input(shape=(extra_shape,))

    # 시퀀스 1에 대한 처리
    seq1_layer = tf.keras.layers.Dense(1024, activation='relu')(input_seq1)
    seq1_layer = tf.keras.layers.Dense(512, activation='relu')(seq1_layer)
    seq1_layer = tf.keras.layers.Dense(256, activation='relu')(seq1_layer)
    seq1_layer = tf.keras.layers.Dense(128, activation='relu')(seq1_layer)

    # 시퀀스 2에 대한 처리
    seq2_layer = tf.keras.layers.Dense(1024, activation='relu')(input_seq2)
    seq2_layer = tf.keras.layers.Dense(512, activation='relu')(seq2_layer)
    seq2_layer = tf.keras.layers.Dense(256, activation='relu')(seq2_layer)
    seq2_layer = tf.keras.layers.Dense(128, activation='relu')(seq2_layer)

    # 추가 데이터에 대한 처리
    extra_layer = tf.keras.layers.Dense(16, activation='relu')(input_extra)
    extra_layer = tf.keras.layers.Dense(32, activation='relu')(extra_layer)
    extra_layer = tf.keras.layers.Dense(64, activation='relu')(extra_layer)
    extra_layer = tf.keras.layers.Dense(128, activation='relu')(extra_layer)

    # 시퀀스와 추가 데이터 결합
    combined = tf.keras.layers.Concatenate()([seq1_layer, seq2_layer, extra_layer])
    # 결합된 시퀀스와 추가 데이터 처리
    layer = tf.keras.layers.Dense(128, activation='relu')(combined)
    layer = tf.keras.layers.Dense(64, activation='relu')(layer)
    layer = tf.keras.layers.Dense(32, activation='relu')(layer)

    # 출력 레이어 (9개의 클래스에 대한 확률 출력)
    output = tf.keras.layers.Dense(output_shape, activation='relu')(layer)

    model = tf.keras.Model(inputs=[input_seq1, input_seq2, input_extra], outputs=output)
    return model

# 모델 생성
model = create_model(tr_cis1_df.shape[1],tr_radius_df.shape[1],y_train.shape[1])

# 모델 컴파일
opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=opt, loss='mae',
              metrics=[tf.keras.metrics.MeanSquaredError(),tf.keras.metrics.RootMeanSquaredError(),
                           tf.keras.metrics.MeanAbsoluteError(),tf.keras.metrics.MeanAbsolutePercentageError()])

In [None]:
# 입력 데이터 준비
input_data = [tr_cis1_df, tr_cis2_df, tr_radius_df]  # 시퀀스 1, 시퀀스 2, 추가 데이터를 리스트로 묶어 입력 데이터로 사용

# 모델 학습
model.fit(input_data, y_train, epochs=100, batch_size=32,
          validation_split=0.2,validation_batch_size=32)

# Model Result

In [None]:
test_input_data = [X_test_cis1, X_test_cis2, test_radius_val]

y_pred = model.predict(test_input_data)

In [None]:
y_pred[:5]

In [None]:
print(f"X Test Length : {len(input_data)}")
print(f"Y Test Length : {len(y_train)}")

In [None]:
print(f"X Test Length : {len(test_input_data)}")
print(f"Y Test Length : {len(y_test)}")
print(f"Y Pred Length : {len(y_pred)}")

# Evaluation

In [None]:
mae_df, mse_df, rmse_df, mape_df, mpe_df = evaluation.get_evalution(test_input_data, y_test, y_pred)

In [None]:
mae_df.head(3)

In [None]:
print(f"1000 mae : {sum(mae_df['1000'].values)/len(mae_df['1000'])}")
print(f"300 mae : {sum(mae_df['300'].values)/len(mae_df['300'])}")
print(f"150 mae : {sum(mae_df['150'].values)/len(mae_df['150'])}")
print(f"100 mae : {sum(mae_df['100'].values)/len(mae_df['100'])}")
print(f"50 mae : {sum(mae_df['50'].values)/len(mae_df['50'])}")
print(f"10 mae : {sum(mae_df['10'].values)/len(mae_df['10'])}")
print(f"5 mae : {sum(mae_df['5'].values)/len(mae_df['5'])}")
print(f"2 mae : {sum(mae_df['2'].values)/len(mae_df['2'])}")
print(f"1 mae : {sum(mae_df['1'].values)/len(mae_df['1'])}")
print(f"Total mae : {sum(mae_df['Total'].values)/len(mae_df['Total'])}")

In [None]:
mse_df.head(3)

In [None]:
print(f"1000 mse : {sum(mse_df['1000'].values)/len(mse_df['1000'])}")
print(f"300 mse : {sum(mse_df['300'].values)/len(mse_df['300'])}")
print(f"150 mse : {sum(mse_df['150'].values)/len(mse_df['150'])}")
print(f"100 mse : {sum(mse_df['100'].values)/len(mse_df['100'])}")
print(f"50 mse : {sum(mse_df['50'].values)/len(mse_df['50'])}")
print(f"10 mse : {sum(mse_df['10'].values)/len(mse_df['10'])}")
print(f"5 mse : {sum(mse_df['5'].values)/len(mse_df['5'])}")
print(f"2 mse : {sum(mse_df['2'].values)/len(mse_df['2'])}")
print(f"1 mse : {sum(mse_df['1'].values)/len(mse_df['1'])}")
print(f"Total mse : {sum(mse_df['Total'].values)/len(mse_df['Total'])}")

In [None]:
rmse_df.head(3)

In [None]:
print(f"1000 rmse : {sum(rmse_df['1000'].values)/len(rmse_df['1000'])}")
print(f"300 rmse : {sum(rmse_df['300'].values)/len(rmse_df['300'])}")
print(f"150 rmse : {sum(rmse_df['150'].values)/len(rmse_df['150'])}")
print(f"100 rmse : {sum(rmse_df['100'].values)/len(rmse_df['100'])}")
print(f"50 rmse : {sum(rmse_df['50'].values)/len(rmse_df['50'])}")
print(f"10 rmse : {sum(rmse_df['10'].values)/len(rmse_df['10'])}")
print(f"5 rmse : {sum(rmse_df['5'].values)/len(rmse_df['5'])}")
print(f"2 rmse : {sum(rmse_df['2'].values)/len(rmse_df['2'])}")
print(f"1 rmse : {sum(rmse_df['1'].values)/len(rmse_df['1'])}")
print(f"Total rmse : {sum(rmse_df['Total'].values)/len(rmse_df['Total'])}")