In [None]:
# import module

import matplotlib.pyplot as plt
import numpy as np
from numpy.random import permutation
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense,Activation, LSTM, Dropout, TimeDistributed, Flatten
from keras.models import load_model
from keras import optimizers
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.callbacks import EarlyStopping

In [None]:


x_train_load = pd.read_csv('x_training_origin.csv', delimiter=',')
y_train_load = pd.read_csv('y_training_origin.csv', delimiter=',')

x_val_load = pd.read_csv('x_validation_origin.csv', delimiter=',')
y_val_load = pd.read_csv('y_validation_origin.csv', delimiter=',')

x_test_load = pd.read_csv('x_2018.csv', delimiter=',')
y_test_load = pd.read_csv('y_2018.csv', delimiter=',')

x_2019_load = pd.read_csv('x_2019.csv', delimiter=',')
y_2019_load = pd.read_csv('y_2019.csv', delimiter=',')

numTrainSize = x_train_load.shape[0]
numValidatSize = x_val_load.shape[0]
numTestSize = x_test_load.shape[0]
num2019Size = x_2019_load.shape[0]
numVar = x_train_load.shape[1]

print("There are " +  str(numTrainSize + numValidatSize + numTestSize) + " training data with " +  str(numVar) + " variables, which equals to 4 data information and 21 variables of 1 year for total 5 continuous years.")
print("There are " + str(num2019Size) + " data used for prediction.")

In [None]:
column = ['FirstYear_TB', 'SecondYear_TB', 'ThirdYear_TB', 'FourthYear_TB', 'FifthYear_TB']
x_train_load.drop(columns = column, inplace = True)
x_val_load.drop(columns = column, inplace = True)
x_test_load.drop(columns = column, inplace = True)
x_2019_load.drop(columns = column, inplace = True)

x_train_sep = x_train_load.iloc[:,4:]
x_val_sep = x_val_load.iloc[:,4:]
x_test_sep = x_test_load
x_2019_sep = x_2019_load

In [None]:
# Separating and normalizing input data

x_train_norm = (x_train_sep - x_train_sep.min())  / (x_train_sep.max() - x_train_sep.min())
x_val_norm = (x_val_sep - x_val_sep.min())  / (x_val_sep.max() - x_val_sep.min())
x_test_norm = (x_test_sep - x_test_sep.min())  / (x_test_sep.max() - x_test_sep.min())
x_2019_norm = (x_2019_sep - x_2019_sep.min())  / (x_2019_sep.max() - x_2019_sep.min())


x_train_reshape = np.reshape(x_train_norm.values, (numTrainSize, 5, 20))
x_val_reshape = np.reshape(x_val_norm.values, (numValidatSize, 5, 20))
x_test_reshape = np.reshape(x_test_norm.values, (numTestSize, 5, 20))
x_2019_reshape = np.reshape(x_2019_norm.values, (num2019Size, 5, 20))

In [None]:
# Separating output data

y_train = y_train_load[y_train_load.columns[3:]]
y_val = y_val_load[y_val_load.columns[3:]]
y_test = y_test_load
y_2019 = y_2019_load

In [None]:
# Classify

y_train_TY = y_train.values
for k in range(numTrainSize):
    y_train_TY[k] = np.floor(y_train_TY[k]/5)# each interval is 5 HR

y_val_TY = y_val.values
for k in range(numValidatSize):
    y_val_TY[k] = np.floor(y_val_TY[k]/5)# each interval is 5 HR
    
y_test_TY = y_test.values
for k in range(numTestSize):
    y_test_TY[k] = np.floor(y_test_TY[k]/5)# each interval is 5 HR
    
y_2019_TY = y_2019.values
for k in range(num2019Size):
    y_2019_TY[k] = np.floor(y_2019_TY[k]/5)# each interval is 5 HR  

In [None]:
# y_train distribution

bins = np.arange(0, 13,1)
plt.hist(y_train_TY, bins = bins, alpha = 0.8)
plt.show()

In [None]:
# y_val distribution

bins = np.arange(0, 13,1)
plt.hist(y_val_TY, bins = bins, alpha = 0.8)
plt.show()

In [None]:
# y_test distribution

bins = np.arange(0, 13,1)
plt.hist(y_test_TY, bins = bins, alpha = 0.8)
plt.show()

In [None]:
# y_2019 distribution

bins = np.arange(0, 13,1)
plt.hist(y_2019_TY, bins = bins, alpha = 0.8)
plt.show()

In [None]:
#one hot

y_train_cat = np_utils.to_categorical(y_train_TY, 12)
y_val_cat = np_utils.to_categorical(y_val_TY, 12)
y_test_cat = np_utils.to_categorical(y_test_TY, 12)
y_2019_cat = np_utils.to_categorical(y_2019_TY, 12)

In [None]:
#concatenate train and val to be the training data

x_train = np.concatenate((x_train_reshape, x_val_reshape))
y_train = np.concatenate((y_train_cat, y_val_cat))

In [None]:
# Analysis

model_1 = load_model('model_1.h5')
model_2 = load_model('model_2.h5')
model_3 = load_model('model_3.h5')
model_4 = load_model('model_4.h5')
model_5 = load_model('model_5.h5')
model_6 = load_model('model_6.h5')
model_7 = load_model('model_7.h5')
model_8 = load_model('model_8.h5')
model_9 = load_model('model_9.h5')
model_10 = load_model('model_10.h5')

In [None]:
#predict 2019 classes

result_2019_model_1 = model_1.predict_classes(x_2019_reshape)
result_2019_model_2 = model_2.predict_classes(x_2019_reshape)
result_2019_model_3= model_3.predict_classes(x_2019_reshape)
result_2019_model_4 = model_4.predict_classes(x_2019_reshape)
result_2019_model_5 = model_5.predict_classes(x_2019_reshape)
result_2019_model_6 = model_6.predict_classes(x_2019_reshape)
result_2019_model_7 = model_7.predict_classes(x_2019_reshape)
result_2019_model_8 = model_8.predict_classes(x_2019_reshape)
result_2019_model_9 = model_9.predict_classes(x_2019_reshape)
result_2019_model_10 = model_10.predict_classes(x_2019_reshape)

In [None]:
#model 1 confusion metrix

pd.crosstab(y_2019_TY.reshape(-1), result_2019_model_1, rownames= ["answer"], colnames= ["predict"])

In [None]:
j = 0
k = 0
l = 0
for i in range(len(y_2019_TY)):
    if y_2019_TY[i] == result_2019_model_1[i]:
        j += 1
    elif y_2019_TY[i]+1 == result_2019_model_1[i]:
        k += 1
    elif y_2019_TY[i]-1 == result_2019_model_1[i]:
        l += 1
        
print("準確率：%.4f" % (j/len(y_2019_TY)))
print("高估率：%.4f" % (k/len(y_2019_TY)))
print("低估率：%.4f" % (l/len(y_2019_TY)))
print("總和：%.4f" % ((j+k+l)/len(y_2019_TY)))