In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.regression import r2_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import AdaBoostRegressor
# import GridSearchCV
import xlrd
import math
import matplotlib.pyplot as plt
import logging
import os
import sys
import pandas as pd
from sklearn import svm
# from queue import Queue
from threading import Thread
from multiprocessing import Process, Queue
import os
import datetime

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# 默认为0：输出所有log信息
# 设置为1：进一步屏蔽INFO信息
# 设置为2：进一步屏蔽WARNING信息
# 设置为3：进一步屏蔽ERROR信息

np.random.seed(1337)  # for reproducibility
logging.basicConfig(level=logging.INFO)


def train_model(learning_rate_rbm, learning_rate, batch_size, x_train, y_train, x_test, message_queue, model_name,
                is_fit, is_predict):
    # path_DBN = os.path.join(os.path.join(os.path.dirname(os.path.abspath(__file__)), "models"), "deep-belief-network")
    path_DBN = os.path.join(os.path.join(os.getcwd(), "models"), "deep-belief-network")
    sys.path.append(path_DBN)
    from dbn.tensorflow import SupervisedDBNRegression

    regressor_DBN = SupervisedDBNRegression(hidden_layers_structure=[10000, 2000, 1000, 500, 200, 100, 20],
                                            learning_rate_rbm=learning_rate_rbm,
                                            learning_rate=learning_rate,
                                            n_epochs_rbm=5,
                                            n_iter_backprop=2,
                                            batch_size=batch_size,
                                            activation_function='sigmoid',
                                            verbose=False)
    # dict = dict_queue.get()

    if os.path.exists(model_name):
        # regressor_DBN.from_dict(dict)
        regressor_DBN = SupervisedDBNRegression.load(model_name)

    if is_fit:
        regressor_DBN.fit(x_train, y_train)

    # dict_queue.put(regressor_DBN.to_dict())
    regressor_DBN.save(model_name)
    # print("save ready")

    if is_predict:
        pred = regressor_DBN.predict(x_test)
        message_queue.put(pred)
    # print(regressor_DBN.predict(x_train))
    return


def train_model_func(learning_rate_rbm, learning_rate, batch_size, feature, label, path_out_png, path_out_txt,
                     train_deep, start_predict):
    print("Training model...")

    corrects = []
    predictions_trandition = []
    predictions_online = []
    RMSEs_trandition = []
    RMSEs_online = []
    dict_trandition = None
    dict_online = None
    is_fit_trandition = True
    is_predict = False
    model_name_trandition = 'model_tradition.pkl'
    model_name_online = 'model_online.pkl'

    if os.path.exists(model_name_trandition):
        os.remove(model_name_trandition)
    if os.path.exists(model_name_online):
        os.remove(model_name_online)

    for i in range(train_deep, label.shape[0]):
        x_train_trandition = np.array(feature[0:start_predict + train_deep])
        y_trian_trandition = np.array(label[0:start_predict + train_deep])
        x_train_online = np.array(feature[i - train_deep:i])
        y_trian_online = np.array(label[i - train_deep:i])
        x_test = np.array(feature[i])
        y_test = np.array(label[i:i + 1])[0]

        '''x_train_trandition = (x_train_trandition - x_train_trandition.min()) / (
        x_train_trandition.max() - x_train_trandition.min())
        x_train_online = (x_train_online - x_train_online.min()) / (x_train_online.max() - x_train_online.min())
        x_test = (x_test - x_test.min()) / (x_test.max() - x_test.min())'''

        message_queue = Queue()
        # dict_queue = Queue()

        if i >= start_predict + train_deep:
            is_predict = True

        # dict_queue.put(dict_trandition)
        _process = Process(target=train_model, args=(
            learning_rate_rbm, learning_rate, batch_size, x_train_trandition, y_trian_trandition, x_test,
            message_queue, model_name_trandition, is_fit_trandition, is_predict))
        _process.start()
        _process.join()
        if is_predict:
            prediction_trandition = message_queue.get()[0][0]
        # dict_trandition = dict_queue.get()
        is_fit_trandition = False

        # dict_queue.put(dict_online)
        _process = Process(target=train_model, args=(
            learning_rate_rbm, learning_rate, batch_size, x_train_online, y_trian_online, x_test, message_queue,
            model_name_online, True, is_predict))
        _process.start()
        _process.join()
        if is_predict:
            prediction_online = message_queue.get()[0][0]
        # dict_online = dict_queue.get()

        if is_predict:
            corrects.append(y_test)
            predictions_trandition.append(prediction_trandition)
            predictions_online.append(prediction_online)

            RMSE_trandition = math.sqrt(mean_squared_error(corrects, predictions_trandition))
            RMSE_online = math.sqrt(mean_squared_error(corrects, predictions_online))

            RMSEs_trandition.append(RMSE_trandition)
            RMSEs_online.append(RMSE_online)

            with open(path_out_txt, 'a') as f:
                f.write("pred_trad:%f\tpred_online:%f\tcorrect:%.2f\t\tRMSE_pred:%.15f\tRMSE_online:%.15f" % (
                    prediction_trandition, prediction_online, y_test, RMSE_trandition, RMSE_online,))

            print("i=%d\t\tpred_trad:%f\tpred_online:%f\tcorrect:%.2f\t\tRMSE_pred:%.15f\tRMSE_online:%.15f" % (
                i, prediction_trandition, prediction_online, y_test, RMSE_trandition, RMSE_online,))

            x_range = range(i - train_deep - start_predict + 1)
            plt.clf()
            '''plt.ylabel('RMSE')
            plt.xlabel('hours')
            plt.title("DBN_tradition VS. DBN_online")
            plt.plot(x_range, RMSEs_trandition, marker='o', label="DBN_tradition")
            plt.plot(x_range, RMSEs_online, marker='o', label="DBN_online")'''
            plt.ylabel('pred_num')
            plt.xlabel('hours')
            plt.title("DBN_tradition VS. DBN_online")
            plt.plot(x_range, predictions_trandition, ls='-.', lw=2, marker='x', label="DBN_tradition")
            plt.plot(x_range, predictions_online, ls='--', lw=2, marker='v', label="DBN_online")
            plt.plot(x_range, corrects,ls='-', lw=2, marker='o', label="correct")
            plt.legend(loc='best')
            plt.savefig(path_out_png)
        else:
            print("i=",i)

    print('Done.\nDBN_pred:\tR-squared: %f\nMSE: %f' % (
        r2_score(corrects, predictions_trandition), mean_squared_error(corrects, predictions_trandition)))
    print('DBN_online:\tR-squared: %f\nMSE: %f' % (
        r2_score(corrects, predictions_online), mean_squared_error(corrects, predictions_online)))


def main(data, target, path_out_png, path_out_txt, learning_rate_rbm=0.001, learning_rate=0.001, batch_size=2,
         train_deep=1, step=4):
    feature = np.array([])
    for start in range(step, data.shape[0] + 1):
        feature = np.append(feature, data[start - step:start].values)
    label = target[step - 1:]
    feature = feature.reshape(label.shape[0], math.floor(feature.size / label.shape[0]))

    train_model_func(learning_rate_rbm=learning_rate_rbm, learning_rate=learning_rate, batch_size=batch_size,
                     feature=feature, label=label, path_out_png=path_out_png, path_out_txt=path_out_txt,
                     train_deep=train_deep, start_predict=100)


if __name__ == "__main__":
    path_out_png = "out/out.png"
    path_out_txt = "out/out.txt"
    open(path_out_txt, 'w').close()

    path_data = "data/airdata.csv"
    data = pd.read_csv(path_data, sep=",")

    # print(type(data['date'][0]))

    target = data["pm25"]
    target = target.drop([0])

    data = data.drop([data.shape[0] - 1])
    data = data.drop(["date"], axis=1)

    for col_name in data.columns:
        data[col_name] = (data[col_name] - data[col_name].min()) / (data[col_name].max() - data[col_name].min())

    # min_max_scaler = MinMaxScaler()
    # data = min_max_scaler.fit_transform(data)

    # print(type(data))

    sys.exit(main(data=data, target=target, path_out_png=path_out_png, path_out_txt=path_out_txt))


Training model...
i= 1
i= 2
i= 3
i= 4
i= 5
i= 6
i= 7
i= 8
i= 9
i= 10
i= 11
i= 12
i= 13
i= 14
i= 15
i= 16
i= 17
i= 18
i= 19
i= 20
i= 21
i= 22
i= 23
i= 24
i= 25
i= 26
i= 27
i= 28
i= 29
i= 30
i= 31
i= 32
i= 33
i= 34
i= 35
i= 36
i= 37
i= 38
i= 39
i= 40
i= 41
i= 42
i= 43
i= 44
i= 45
i= 46
i= 47
i= 48
i= 49
i= 50
i=51		pred_trad:43.611095	pred_online:55.610909	correct:39.00		RMSE_pred:4.611095428466797	RMSE_online:16.610908508300781
i=52		pred_trad:43.611092	pred_online:39.087379	correct:32.00		RMSE_pred:8.834015211492918	RMSE_online:12.770145438058332
i=53		pred_trad:43.611092	pred_online:32.589001	correct:27.00		RMSE_pred:12.000111306634077	RMSE_online:10.914671439003722
i=54		pred_trad:43.611092	pred_online:28.055779	correct:25.00		RMSE_pred:13.949737857491655	RMSE_online:9.575071019660404
i=55		pred_trad:43.611092	pred_online:26.774399	correct:39.00		RMSE_pred:12.646287289489507	RMSE_online:10.160642349151619
i=56		pred_trad:43.611092	pred_online:38.950958	correct:44.00		RMSE_pred:11.545