导入必要的Python库：

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import pickle
import time
from Data import Data

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

设置__可视化参数__：

In [None]:
mpl.rcParams['figure.figsize'] = (20, 6)
mpl.rcParams['axes.grid'] = False

初始化__随机种子__：

In [None]:
tf.random.set_seed(2020)

设置默认__浮点数类型__：

In [None]:
tf.keras.backend.set_floatx('float64')

载入数据：

In [None]:
# Alibaba dataset
# data = dict()
# for filename in os.listdir('.'):
#     if filename.endswith('.pickle'):
#         with open(filename, 'rb') as f:
#             data[filename[filename.rindex('_') + 1:filename.index('.')]] = pickle.load(f)[3000:]
# data = pd.DataFrame(data)
# data.head(5)

# Bustracker Dataset
data = pd.read_csv('BusTrackerData/1.csv',names=['date', 'freq'])
# data = pd.read_csv('SDSS/DTW/149.csv',names=['date', 'freq'])
data = data.dropna(axis=0,how='any')
data['date'] = pd.to_datetime(data['date'])
# df.reset_index(drop=True,inplace=True)
data = data.set_index('date')

# SDSS Dataset
# data = pd.read_csv('SDSS/DTW149.csv',names=['date', 'freq'])
# data = data.dropna(axis=0,how='any')
# data['date'] = pd.to_datetime(data['date'])
# # df.reset_index(drop=True,inplace=True)
# data = data.set_index('date')

    
data.head(5)

数据可视化：

In [None]:
data.plot(subplots=True)
plt.show()

选择__预测目标__：

In [None]:
for i, column in enumerate(data.columns):
    print('{}: {}'.format(i, column))
# target = int(input('target (0~{}):'.format(len(data.columns) - 1)))
target = 0
assert 0 <= target < len(data.columns)
print('Your choice:', data.columns[target])

拆分__训练数据__和__验证数据__：

In [None]:
# TRAIN_SPLIT = 800
TRAIN_SPLIT = 7000  # Bustracker
# TRAIN_SPLIT = 4000  # SDSS

UNI_DATA = True
HISTORY_SIZE, TARGET_SIZE, STEP, SINGLE_STEP = 30, 5, 1, True
data = Data(data.values, target, TRAIN_SPLIT, HISTORY_SIZE, TARGET_SIZE, UNI_DATA, STEP, SINGLE_STEP)
print(data.x_train.shape, data.x_train_target.shape, data.y_train.shape)
# print(data.x_train[0])
# print(data.y_train[0])

定义__Generator模型__：

In [None]:
generator = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(units=16, return_sequences=False, input_shape=data.x_train.shape[-2:]),
#     tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1 if SINGLE_STEP else TARGET_SIZE)
])

tf.keras.utils.plot_model(generator, show_shapes=True)

定义__Discriminator模型__：

In [None]:
def make_discriminator(data):
    x_input = tf.keras.Input(shape=data.x_train_target.shape[-2:], name='history')
    y_input = tf.keras.Input(shape=(1 if SINGLE_STEP else TARGET_SIZE,), name='target')
    y_reshape = tf.keras.layers.Reshape((1, 1 if SINGLE_STEP else TARGET_SIZE))(y_input)

    series = tf.keras.layers.Concatenate(axis=1)([x_input, y_reshape])
    series = tf.keras.layers.LSTM(32, return_sequences=False)(series)
#     series = tf.keras.layers.Flatten()(series)
    decision = tf.keras.layers.Dense(1)(series)
#     decision = tf.keras.activations.sigmoid(decision)
    
    return tf.keras.Model(inputs=[x_input, y_input], outputs=[decision])

discriminator = make_discriminator(data)

tf.keras.utils.plot_model(discriminator, show_shapes=True)

定义__损失函数__：

In [None]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

def generator_loss(fake_output, y, y_pred):
#     mse = tf.keras.losses.MeanSquaredError()
#     return cross_entropy(tf.ones_like(fake_output), fake_output) + mse(y, y_pred)

    return cross_entropy(tf.ones_like(fake_output), fake_output)

def discriminator_loss(real_output, fake_output, y, y_pred):
#     mse = tf.keras.losses.MeanSquaredError()
    
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
#     return real_loss + fake_loss + mse(y, y_pred)
    
    return real_loss + fake_loss 

设置__优化器__：

In [None]:
generator_optimizer = tf.keras.optimizers.Adam(lr=0.0001)
discriminator_optimizer = tf.keras.optimizers.Adam(lr=0.001)

定义__训练步__：

In [None]:
@tf.function
def train_step(x, x_target, y, genTrain):
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        y_pred = generator(x, training=True)

        real_output = discriminator([x_target, y], training=True)
        fake_output = discriminator([x_target, y_pred], training=True)

        gen_loss = generator_loss(fake_output, y, y_pred)
        disc_loss = discriminator_loss(real_output, fake_output, y, y_pred)
        
        if genTrain:
            gen_gradients = gen_tape.gradient(gen_loss, generator.trainable_variables)
            generator_optimizer.apply_gradients(zip(gen_gradients, generator.trainable_variables))

        disc_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
        discriminator_optimizer.apply_gradients(zip(disc_gradients, discriminator.trainable_variables))

    return gen_loss, disc_loss

定义__评估函数__：

In [None]:
mse = tf.keras.losses.MeanSquaredError()

def evaluate(model, data):
    y_true, y_pred = [], []
    for (x, x_target, y) in data:
        y_true.extend(y)
        y_pred.extend(model(x))
    return mse(y_true, y_pred)

定义__训练循环__：

In [None]:
# def train(data_train, data_val, epochs, steps_per_epoch):
#     loss_history = {'train_loss': [], 'val_loss': []}
#     final_val_loss = 1000
#     pp = 0
#     for epoch in range(epochs):
#         start = time.time()

#         gen_loss, disc_loss, train_loss, val_loss = None, None, None, None
#         for step, (x, x_target, y) in enumerate(data_train.repeat()):
#             if step >= steps_per_epoch:
#                 break
#             if step % 6 == 0:
#                 genTrain = True
#             else:
#                 genTrain = True
#             gen_loss, disc_loss = train_step(x, x_target, y, genTrain)
#         train_loss = evaluate(generator, data_train)
#         val_loss = evaluate(generator, data_val)
        
#         if val_loss < final_val_loss:
#             final_val_loss = val_loss
#             generator.save('Model/BusTracker/'+ 'GAN-'+str(TARGET_SIZE)+'.h5')
#             print('new model save', pp)
#             pp = pp + 1
            
#         loss_history['train_loss'].append(train_loss)
#         loss_history['val_loss'].append(val_loss)

#         print('Time for epoch {} is {:.3f} sec. gen_loss: {:.6f}, disc_loss: {:.6f}, train_loss: {:.6f}, val_loss: {:.6f}'.format(
#             epoch + 1, time.time() - start, gen_loss, disc_loss, train_loss, val_loss
#         ))

#     return loss_history

def train(data_train, data_val, epochs, steps_per_epoch):
    loss_history = {'train_loss': [], 'val_loss': []}
    for epoch in range(epochs):
        start = time.time()

        gen_loss, disc_loss, train_loss, val_loss = None, None, None, None
        for step, (x, x_target, y) in enumerate(data_train.repeat()):
            if step >= steps_per_epoch:
                break
            if step % 6 == 0:
                genTrain = True
            else:
                genTrain = True
            gen_loss, disc_loss = train_step(x, x_target, y, genTrain)
        train_loss = evaluate(generator, data_train)
        val_loss = evaluate(generator, data_val)

        loss_history['train_loss'].append(train_loss)
        loss_history['val_loss'].append(val_loss)

        print('Time for epoch {} is {:.3f} sec. gen_loss: {:.6f}, disc_loss: {:.6f}, train_loss: {:.6f}, val_loss: {:.6f}'.format(
            epoch + 1, time.time() - start, gen_loss, disc_loss, train_loss, val_loss
        ))

    return loss_history

封装__训练数据集__和__验证数据集__：

In [None]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
data_train, data_val = data.dataset(BUFFER_SIZE, BATCH_SIZE)
# print('hhh')

模型__训练__：

In [None]:
EPOCHS = 50
STEPS_PER_EPOCH = 50 
train_history = train(data_train, data_val, EPOCHS, STEPS_PER_EPOCH)

模型__评估__：

In [None]:
final_loss = evaluate(generator, data_val)
print('Evaluation MSE LOSS:', final_loss.numpy())
generator.save('Model/BusTracker/'+ 'GAN_Attention-'+str(TARGET_SIZE)+'.h5')

__损失函数__变化趋势：

In [None]:
def plot_train_history(history, title):
    train_loss = history['train_loss']
    val_loss = history['val_loss']
    epochs = range(len(train_loss))
    plt.figure()
    plt.plot(epochs, train_loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title(title)
    plt.legend()
    plt.show()

plot_train_history(train_history, 'Training and validation loss')

结果展示：

In [None]:
y_predict = generator(data.x_val)
plt.plot(data.y_val, 'b-', label='actual')
plt.plot(y_predict, 'r--', label='predict')
plt.legend()
plt.show()