导入必要的Python库：

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import pickle
import time
from Data import Data

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from transformer import Transformer
from tensorflow.keras.layers import Input

设置__可视化参数__：

In [None]:
mpl.rcParams['figure.figsize'] = (20, 6)
mpl.rcParams['axes.grid'] = False

初始化__随机种子__：

In [None]:
tf.random.set_seed(2020)

设置默认__浮点数类型__：

In [None]:
tf.keras.backend.set_floatx('float32')

载入数据：

In [None]:
# data = dict()
# for filename in os.listdir('.'):
#     if filename.endswith('.pickle'):
#         with open(filename, 'rb') as f:
#             data[filename[filename.rindex('_') + 1:filename.index('.')]] = pickle.load(f)[3000:]
# data = pd.DataFrame(data)
# data.head(5)
origin_data = pd.read_csv('BusTrackerData/1.csv',names=['date', 'freq'])
origin_data = origin_data.dropna(axis=0,how='any')
origin_data['date'] = pd.to_datetime(origin_data['date'])
# df.reset_index(drop=True,inplace=True)
origin_data = origin_data.set_index('date')

with open('BusTrackerData/1_fourier.pkl', 'rb') as f:
    fourier_list = pickle.load(f)
for i, ll in enumerate(fourier_list):
    origin_data['fourier'+str(i)] = ll
    
origin_data.head(5)

数据可视化：

In [None]:
origin_data.plot(subplots=True)
plt.show()

选择__预测目标__：

In [None]:
for i, column in enumerate(origin_data.columns):
    print('{}: {}'.format(i, column))
# target = int(input('target (0~{}):'.format(len(data.columns) - 1)))
target = 0
assert 0 <= target < len(origin_data.columns)
print('Your choice:', origin_data.columns[target])

拆分__训练数据__和__验证数据__：

In [None]:
TRAIN_SPLIT = 7000
UNI_DATA = True
HISTORY_SIZE, TARGET_SIZE, STEP, SINGLE_STEP = 30, 0, 1, True
data = Data(origin_data.values, target, TRAIN_SPLIT, HISTORY_SIZE, TARGET_SIZE, UNI_DATA, STEP, SINGLE_STEP)
# print(data.x_train[0])
# print(data.y_train[0])
data.y_train = tf.reshape(data.y_train, [data.y_train.shape[0], data.y_train.shape[1], -1])
data.y_val = tf.reshape(data.y_val, [data.y_val.shape[0], data.y_val.shape[1], -1])
print(data.x_train.shape, data.x_train_target.shape, data.y_train.shape)
print(data.x_val.shape, data.x_val_target.shape, data.y_val.shape)

In [None]:
Model = Transformer({})
inputs = (Input([HISTORY_SIZE,1]),Input([STEP,1]))
outputs = Model(inputs, training=True, predict_seq_length=STEP)

model = tf.keras.Model(inputs, outputs, name='Transformer')

tf.keras.utils.plot_model(model, show_shapes=True)

定义损失函数：

In [None]:
mse = tf.keras.losses.MeanSquaredError()

def model_loss(y_pred, y):
    return mse(y_pred, y)

设置__优化器__：

In [None]:
model_optimizer = tf.keras.optimizers.Adam()

定义__训练步__：

In [None]:
@tf.function
def train_step(x, x_target, y):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        y_pred = model([tf.cast(x, tf.float32),tf.cast(y,tf.float32)], training=True)

        loss = model_loss(y_pred, y)

        gradients = gen_tape.gradient(loss, model.trainable_variables)
        model_optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    return loss

定义__评估函数__：

In [None]:
def evaluate(model, data):
    y_true, y_pred = [], []
    for (x, x_target, y) in data:
        y_true.extend(y)
        y_pred.extend(model([tf.cast(x, tf.float32),tf.cast(y,tf.float32)]))
    return mse(y_true, y_pred)

定义__训练循环__：

In [None]:
def train(data_train, data_val, epochs, steps_per_epoch):
    loss_history = {'train_loss': [], 'val_loss': []}
    for epoch in range(epochs):
        start = time.time()

        gen_loss, disc_loss, train_loss, val_loss = None, None, None, None
        for step, (x, x_target, y) in enumerate(data_train.repeat()):
            if step >= steps_per_epoch:
                break
            gen_loss = train_step(x, x_target, y)
        train_loss = evaluate(model, data_train)
        val_loss = evaluate(model, data_val)

        loss_history['train_loss'].append(train_loss)
        loss_history['val_loss'].append(val_loss)

        print('Time for epoch {} is {:.3f} sec. gen_loss: {:.6f}, train_loss: {:.6f}, val_loss: {:.6f}'.format(
            epoch + 1, time.time() - start, gen_loss, train_loss, val_loss
        ))

    return loss_history

封装__训练数据集__和__验证数据集__：

In [None]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
data_train, data_val = data.dataset(BUFFER_SIZE, BATCH_SIZE)

模型__训练__：

In [None]:
EPOCHS = 50
STEPS_PER_EPOCH = 50
train_history = train(data_train, data_val, EPOCHS, STEPS_PER_EPOCH)

模型__评估__：

In [None]:
final_loss = evaluate(model, data_val)
print('Evaluation MSE LOSS:', final_loss.numpy())
# generator.save('Model/Alibaba/'+ 'LSTM-'+str(TARGET_SIZE)+'.h5')

__损失函数__变化趋势：

In [None]:
def plot_train_history(history, title):
    train_loss = history['train_loss']
    val_loss = history['val_loss']
    epochs = range(len(train_loss))
    plt.figure()
    plt.plot(epochs, train_loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title(title)
    plt.legend()
    plt.show()

plot_train_history(train_history, 'Training and validation loss')

结果展示：

In [None]:
y_predict = model(data.x_val)
plt.plot(data.y_val, 'b-', label='actual')
plt.plot(y_predict, 'r--', label='predict')
plt.legend()
plt.show()