In [None]:
# Import packages

import pandas as pd
import numpy as np
import sys
from keras.models import load_model
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [None]:
# Define function

def look_back(X, a):
    X_lb = np.zeros((len(X)- 29*a , a, 12))
    for i in range(len(X) - 29 * a):
        for j in range(a):
            X_lb[i, j] = X[i+(j*29)]
    X_lb = X_lb.reshape(int(len(X)/29) - a, 29, a, 12)
    Y_lb = X[a*29:, 7]
    Y_lb = Y_lb.reshape(int(len(X)/29) - a, 29, 1)
    return X_lb, Y_lb

def division(data):
    train_size = int(len(data)*0.6)
    val_size = int(len(data)*0.8)
    data_train = data[0:train_size]
    data_val = data[train_size:val_size]
    data_test = data[val_size:len(data)]
    return data_train, data_val, data_test

In [None]:
# Names of water-quality stations

stations = ['Goryeong','Naju', 'Nam River', 'Neungseo', 'Dalcheon', 'Lake Daecheong', 'Dogae', 'Lake Dongbok','Bokhacheon', 'Sinam', 'Andong Dam downstream', 'Yangpyeong', 'Yeoju', 'Lake Okjeong', 'Lake Yongdam', 'Yongbong', 'Uchi', 'Yugucheon', 'Lake Uiam', 'Jang-gye', 'Jeokpo', 'Lake Juam', 'Jiseokcheon', 'Cheongam', 'Chilgok', 'Lake Tamjin', 'Poongyang', 'Hyeondo', 'Hoesang']

In [None]:
# create the data about best time step 5

df = pd.read_csv('./data.csv', encoding='ms949')

ts = 5

train = df[:641*29]                    
validation = df[len(train):1006*29]    
test = df[len(train)+len(validation):] 

X_train, Y_train = look_back(train.values, ts)
Y_train = Y_train.reshape(len(Y_train), 29, 1, 1)

X_val, Y_val = look_back(validation.values, ts)
Y_val = Y_val.reshape(len(Y_val), 29, 1, 1)

X_test, Y_test = look_back(test.values, ts)
Y_test = Y_test.reshape(len(Y_test), 29, 1, 1)

In [None]:
model = load_model('./CNN_5.h5')
y_predicted = model.predict(X_test)

In [None]:
# observed values of 29 water-quality monitoring stations respectively


mod = sys.modules[__name__]

for i,name in enumerate(stations):
    data = Y_test[:, i, :, :]
    data = data.reshape(-1, 1)
    setattr(mod, 'real_{}'.format(name), data)

for i,name in enumerate(stations):
    data = y_predicted[:, i, :, :]
    data = data.reshape(-1, 1)
    setattr(mod, 'pred_{}'.format(name), data)

In [None]:
rmse_lst = []
r2_lst = []

i = 0
for name in stations:
    y_real_station = getattr(mod,  'real_{}'.format(name))
    y_pred_station = getattr(mod,  'pred_{}'.format(name))
    
    raw= {'Observed': list(y_real_station), 'Predicted': list(y_pred_station)}
    rr = pd.DataFrame(raw)
    reg = sm.OLS.from_formula("Observed ~ Predicted",rr).fit()
    
    rmse = round(sqrt(mean_squared_error(y_real_station, y_pred_station)), 3)
    r2 = round(reg.rsquared, 3)
    
    rmse_lst.append(rmse)
    r2_lst.append(r2)
    
    plt.figure(figsize=(10, 7))
    plt.title('{}'.format(eng_stations[i]))
    plt.plot(y_real_station, label='Observed')
    plt.plot(y_pred_station, label='Predict')
    plt.ylabel('Chlorophyll-a', fontsize=20)
    plt.xlabel('Days', fontsize=20)
    plt.legend()
    plt.savefig('./plot_{}.png'.format(name))
    plt.show()
    i += 1