In [1]:
from tensorflow import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_recall_fscore_support as score

__author__ = "Chenxiao Zeng"
__email__ = "zeng.544@osu.edu"

'''
The Training set is generated by taking 0:40 and 0:40 region and 
combine with the samples with extreme features in 0:200 and 0:200 region.
'''

def read_df():
    """ 
    Read the data frame file and remove the first row for duplicate engry 
    """
    df = pd.read_csv("/users/PCON0003/osu10644/h4RG_10_noise/tensorflow_dl/df_train_test_subset.csv")
    df = df.iloc[:, 1:]
    return df

def train_test():
    """ 
    Split the reading files into train and test sets 
    """
    df = read_df()
    X = df.iloc[:, :56]
    y = df.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=1)
    return X_train, X_test, y_train, y_test.astype(int)

def normalize():
    """ 
    Normalize the time series into range 0 to 1 using MinMaxScaler in sklern
    """
    X_train, X_test, y_train, y_test = train_test()
    scaler=MinMaxScaler()
    X_train=pd.DataFrame(scaler.fit_transform(X_train.T).T,columns=X_train.columns)
    X_test=pd.DataFrame(scaler.fit_transform(X_test.T).T,columns=X_test.columns)
    return X_train.values, X_test.values, y_train.values, y_test.values

def plot_TimeSeries(y, name, x= np.arange(56).reshape(-1, 1), path="./", color='black', linestyle='-', linewidth=3):
    """
    Plot the series given the y values of the time series
    """
    fig = plt.figure(figsize=(12, 18))
    gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) 
    gs.update(wspace=0.025, hspace=0) 
    ax0 = plt.subplot(gs[0])
    ax0.set_yscale("linear")
    ax0.set_xscale("linear")
    ax0.set_ylabel("Amplitude")
    ax0.set_xlabel("Time Frame")
    line, = ax0.plot(x, y, color = color, linestyle=linestyle, linewidth=linewidth)
    plt.savefig(path + name + ".pdf",dpi=300, bbox_inches='tight')
    return

def vis_class():
    """
    Visualize example in each class
    """
    X_train, X_test, y_train, y_test = normalize()
    classes = np.unique(np.concatenate((y_train, y_test), axis=0))
    # print(X_train[:5])
    plt.figure()
    for c in classes:
        c_x_train = X_train[y_train == c]
        plt.plot(c_x_train[0], label="class" + str(c))
    plt.legend(loc="best")
    plt.show()
    return

def reshape_x():
    """
    Reshape the X input so that the deep learning neural nework can process
    """
    X_train, X_test, y_train, y_test = normalize()
    X_train_ = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_ = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
    return X_train_, X_test_, y_train, y_test

def make_model(input_shape, num_classes, filters, kernel_size):
    """
    Construct the Convolutional Neural Network model, default filters = 64, kernal_size = 3
    """
    input_layer = keras.layers.Input(input_shape)
    
    conv1 = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding="same")(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.ReLU()(conv1)

    conv2 = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding="same")(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.ReLU()(conv2)

    conv3 = keras.layers.Conv1D(filters=filters, kernel_size=kernel_size, padding="same")(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.ReLU()(conv3)

    gap = keras.layers.GlobalAveragePooling1D()(conv2)

    output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap)
    return keras.models.Model(inputs=input_layer, outputs=output_layer)

def evaluate_model(X_train, X_test, y_train, y_test, n_kernel):
    """
    Evaluate the model, default epochs = 500, batch_size = 32, verbose = 0
    """
    verbose, epochs, batch_size = 0, 500, 32
    num_classes = len(np.unique(y_train))
    model = make_model(X_train.shape[1:], num_classes, 64, 3)
    keras.utils.plot_model(model, show_shapes=True)
    callbacks = [
        keras.callbacks.ModelCheckpoint(
            "best_model.h5", save_best_only=True, monitor="val_loss"
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
        ),
        keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
    ]
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["sparse_categorical_accuracy"],
    )
    history = model.fit(
        X_train,
        y_train,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=callbacks,
        validation_split=0.2,
        verbose=verbose,
    )
    model = keras.models.load_model("best_model.h5")
    y_pred = model.predict(X_test)
    test_loss, test_acc = model.evaluate(X_test, y_test)
    return test_loss, test_acc, history, y_pred

In [2]:
X_train, X_test, y_train, y_test = reshape_x()
test_loss, test_acc, history, y_pred = evaluate_model(X_train, X_test, y_train, y_test, n_kernel=0)

2022-07-27 14:07:03.207927: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-27 14:07:03.213270: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2400000000 Hz
2022-07-27 14:07:03.213366: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5594442e69f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-07-27 14:07:03.213378: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version


('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')
Epoch 00161: early stopping


In [3]:
y_pred_arg = np.argmax(y_pred, axis=1)
precision, recall, fscore, support = score(y_test, y_pred_arg)
mess_length = 12
type_length = 16
print('precision:'.ljust(mess_length), "".join(["type%d: %.3f".ljust(type_length) % (idx, val) for idx, val in enumerate(precision)]))
print('recall:'.ljust(mess_length), "".join(["type%d: %.3f".ljust(type_length) % (idx, val) for idx, val in enumerate(recall)]))
print('fscore:'.ljust(mess_length), "".join(["type%d: %.3f".ljust(type_length) % (idx, val) for idx, val in enumerate(fscore)]))
print('support:'.ljust(mess_length), "".join(["type%d: %.1f".ljust(16) % (idx, val) for idx, val in enumerate(support)]))

precision:   type0: 0.930    type1: 0.778    type2: 0.500    type3: 1.000    
recall:      type0: 0.993    type1: 0.304    type2: 0.167    type3: 0.909    
fscore:      type0: 0.960    type1: 0.438    type2: 0.250    type3: 0.952    
support:     type0: 279.0    type1: 23.0    type2: 6.0    type3: 11.0    
