# 02. TCN Modeling
- TCN: Temporal Convolutional Modeling
- reference: [Keras TCN](https://github.com/philipperemy/keras-tcn)

In [None]:
import os
import glob

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score, recall_score, precision_score, f1_score

import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv1D
from tensorflow.keras import Input, Model

from tcn import TCN, tcn_full_summary

### TCN의 기본적인 학습 파이프라인
- TensorFlow의 Keras API와 같은 방식으로 작동하며, Keras와 연동하여 사용하는 것이 가능하다.

In [None]:
data = pd.read_csv('../data/train/train_TCN.csv')
X, y = data.drop(['GeneId', 'Prediction'], axis=1), data['Prediction']
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=.3, random_state=42, shuffle=True)

# Scaling
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)

print(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)

Yes Normalization & No Activation in TCN

In [None]:
input_length = 500
batch_size, timesteps, input_dim = None, 5, 1

input_layer = Input(batch_shape=(batch_size, timesteps, input_dim))
dilated_factor1 = Conv1D(input_length, kernel_size=3, dilation_rate=2, padding='causal')(input_layer)

TCN_NET_1 = TCN(nb_filters=128, kernel_size=3 , dilations=[2,4], 
                nb_stacks=1, padding='causal', activation=None, return_sequences=True)(dilated_factor1)
TCN_NET_2 = TCN(nb_filters=64, kernel_size=3, dilations=[2,4], 
                nb_stacks=1, padding='causal', activation=None, return_sequences=True)(TCN_NET_1)
TCN_NET_3 = TCN(nb_filters=32, kernel_size=3, dilations=[2,4], 
                nb_stacks=1, padding='causal', activation=None, return_sequences=False)(TCN_NET_2)

Dense_layer_1 = Dense(200, activation='relu')(TCN_NET_3)
Dense_layer_2 = Dense(100, activation='relu')(Dense_layer_1)
output_layer = Dense(1, activation='sigmoid')(Dense_layer_2)

model_noRelu = Model(inputs=[input_layer], outputs=[output_layer])
model_noRelu.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [None]:
BATCH_SIZE = 8
EPOCHS = 25
history_noRelu = model_noRelu.fit(
    x=X_train, 
    y=y_train, 
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_valid, y_valid),
    verbose=1)

In [None]:
result = pd.DataFrame(dict(val_loss=history_noRelu.history['val_loss'], val_acc=history_noRelu.history['val_acc']))

plt.figure(figsize=(18,6))
# sns.pointplot(x=result.index, y=result['val_loss'], color='blue')
sns.pointplot(x=result.index, y=result['val_acc'], color='red')

In [None]:
temp = model_noRelu.predict(X_valid).flatten()
print(roc_auc_score(y_valid, temp))
print(recall_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))
print(precision_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))
print(f1_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))

Yes Normalization & Yes Activation in TCN

In [None]:
input_length = 500
batch_size, timesteps, input_dim = None, 5, 1

input_layer = Input(batch_shape=(batch_size, timesteps, input_dim))
dilated_factor1 = Conv1D(input_length, kernel_size=3, dilation_rate=2, padding='causal')(input_layer)

TCN_NET_1 = TCN(nb_filters=128, kernel_size=3 , dilations=[2,4], 
                nb_stacks=1, padding='causal', activation='relu', return_sequences=True)(dilated_factor1)
TCN_NET_2 = TCN(nb_filters=64, kernel_size=3, dilations=[2,4], 
                nb_stacks=1, padding='causal', activation='relu', return_sequences=True)(TCN_NET_1)
TCN_NET_3 = TCN(nb_filters=32, kernel_size=3, dilations=[2,4], 
                nb_stacks=1, padding='causal', activation='relu', return_sequences=False)(TCN_NET_2)

Dense_layer_1 = Dense(200, activation='relu')(TCN_NET_3)
Dense_layer_2 = Dense(100, activation='relu')(Dense_layer_1)
output_layer = Dense(1, activation='sigmoid')(Dense_layer_2)

model_yesRelu = Model(inputs=[input_layer], outputs=[output_layer])
model_yesRelu.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

In [None]:
BATCH_SIZE = 8
EPOCHS = 13
history_yesRelu = model_yesRelu.fit(
    x=X_train, 
    y=y_train, 
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_valid, y_valid),
    verbose=1)

In [None]:
temp = model_yesRelu.predict(X_valid).flatten()
print(roc_auc_score(y_valid, temp))
print(recall_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))
print(precision_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))
print(f1_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))

In [None]:
BATCH_SIZE = 8
EPOCHS = 13
history_yesRelu = model_yesRelu.fit(
    x=X_train, 
    y=y_train, 
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_valid, y_valid),
    verbose=1)

In [None]:
temp = model_yesRelu.predict(X_valid).flatten()
print(roc_auc_score(y_valid, temp))
print(recall_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))
print(precision_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))
print(f1_score(y_valid, pd.Series(temp).apply(lambda x: 1 if x > 0.5 else 0)))

In [None]:
result = pd.DataFrame(dict(val_loss=history_yesRelu.history['val_loss'], val_acc=history_yesRelu.history['val_acc']))

plt.figure(figsize=(14,6))
# sns.pointplot(x=result.index, y=result['val_loss'], color='blue')
sns.pointplot(x=result.index, y=result['val_acc'])
plt.title('Accuracy of Validation')
plt.ylabel('Accuracy(%)')
plt.xlabel('Epoch')

In [None]:
print(roc_auc_score(y_valid, temp.flatten()))
print(recall_score(y_valid, pd.Series(temp.flatten()).apply(lambda x: 1 if x > 0.5 else 0)))
print(precision_score(y_valid, pd.Series(temp.flatten()).apply(lambda x: 1 if x > 0.5 else 0)))
print(f1_score(y_valid, pd.Series(temp.flatten()).apply(lambda x: 1 if x > 0.5 else 0)))