In [10]:
import pandas as pd


In [11]:
import keras.backend as K
import numpy as np
from matplotlib import pyplot as plt

def get_activations(model, inputs, print_shape_only=False, layer_name=None):
    # Documentation is available online on Github at the address below.
    # From: https://github.com/philipperemy/keras-visualize-activations
    print('----- activations -----')
    activations = []
    inp = model.input
    if layer_name is None:
        outputs = [layer.output for layer in model.layers]
    else:
        outputs = [layer.output for layer in model.layers if layer.name == layer_name]  # all layer outputs
    funcs = [K.function([inp] + [K.learning_phase()], [out]) for out in outputs]  # evaluation functions
    layer_outputs = [func([inputs, 1.])[0] for func in funcs]
    for layer_activations in layer_outputs:
        activations.append(layer_activations)
        if print_shape_only:
            print(layer_activations.shape)
        else:
            print('shape为',layer_activations.shape)
            print(layer_activations)
    return activations


def get_data(n, input_dim, attention_column=1):
    """
    Data generation. x is purely random except that it's first value equals the target y.
    In practice, the network should learn that the target = x[attention_column].
    Therefore, most of its attention should be focused on the value addressed by attention_column.
    :param n: the number of samples to retrieve.
    :param input_dim: the number of dimensions of each element in the series.
    :param attention_column: the column linked to the target. Everything else is purely random.
    :return: x: model inputs, y: model targets
    """
    x = np.random.standard_normal(size=(n, input_dim))
    y = np.random.randint(low=0, high=2, size=(n, 1))
    x[:, attention_column] = y[:, 0]
    return x, y


def get_data_recurrent(n, time_steps, input_dim, attention_column=10):
    """
    Data generation. x is purely random except that it's first value equals the target y.
    In practice, the network    should learn that the target = x[attention_column].
    Therefore, most of its attention should be focused on the value addressed by attention_column.
    :param n: the number of samples to retrieve.
    :param time_steps: the number of time steps of your series.
    :param input_dim: the number of dimensions of each element in the series.
    :param attention_column: the column linked to the target. Everything else is purely random.
    :return: x: model inputs, y: model targets
    """
    x = np.random.standard_normal(size=(n, time_steps, input_dim))
    y = np.random.randint(low=0, high=2, size=(n, 1))
    x[:, attention_column, :] = np.tile(y[:], (1, input_dim))
    return x, y


def get_data_recurrent2(n, time_steps, input_dim, attention_dim=5):
    """
    Suppose input_dim = 10  time_steps = 6
    formed one  x 6 x 10 The data of each step 6 dimension is the same as y
    """
    x = np.random.standard_normal(size=(n, time_steps, input_dim))
    y = np.random.randint(low=0, high=2, size=(n, 1))
    x[:,:,attention_dim] =  np.tile(y[:], (1, time_steps))


    return x,y
     

In [14]:
# 本导入顺序可以看到类型
import tensorflow as tf
import keras
import tensorflow_docs
import datetime
import os
import pandas as pd
import numpy as np


from attention_utils import get_activations

from sklearnex import patch_sklearn
from matplotlib import axis, pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import ensemble, metrics


from keras import layers
from keras import models

# from keras.layers import (
#     Input,
#     Dense,
#     LSTM,
#     Conv1D,
#     Dropout,
#     Bidirectional,
#     Multiply,
#     Lambda,
#     RepeatVector,
#     Permute,
# )


def merging(inputs, axis):
    return layers.concatenate(inputs, axis)


# from attention_utils import get_activations
# from keras.layers.core import *
# from keras.layers.merging.concatenate import concatenate
# from keras.layers.merging.add import add
# from keras.layers.merging.multiply import multiply
# from keras.layers.merging.subtract import subtract
# from keras.layers.merging.average import average
# from keras.layers.merging.maximum import maximum
# from keras.layers.merging.minimum import minimum
# from keras.layers.merging.dot import dot

SINGLE_ATTENTION_VECTOR = False


def attention_3d_block(inputs):
    # inputs.shape = (batch_size, time_steps, input_dim)

    input_dim = int(inputs.shape[2])

    a = inputs

    # a = Permute((2, 1))(inputs)

    # a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.

    a = Dense(input_dim, activation="softmax")(a)

    if SINGLE_ATTENTION_VECTOR:
        a = Lambda(lambda x: K.mean(x, axis=1), name="dim_reduction")(a)

        a = RepeatVector(input_dim)(a)

    a_probs = Permute((1, 2), name="attention_vec")(a)

    # output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul')

    output_attention_mul = Multiply()([inputs, a_probs])
    return output_attention_mul


# Another way of writing the attention mechanism is suitable for the use of the above error source:https://blog.csdn.net/uhauha2929/article/details/80733255


def attention_3d_block2(inputs, single_attention_vector=False):
    # If the upper layer is LSTM, you need return_sequences=True

    # inputs.shape = (batch_size, time_steps, input_dim)

    time_steps = K.int_shape(inputs)[1]

    input_dim = K.int_shape(inputs)[2]

    a = Permute((2, 1))(inputs)

    a = Dense(time_steps, activation="softmax")(a)

    if single_attention_vector:
        a = Lambda(lambda x: K.mean(x, axis=1))(a)

        a = RepeatVector(input_dim)(a)

    a_probs = Permute((2, 1))(a)

    # Multiplied by the attention weight, but there is no summation, it seems to have little effect

    # If you classify tasks, you can do Flatten expansion

    # element-wise

    output_attention_mul = Multiply()([inputs, a_probs])
    return output_attention_mul


def create_dataset(dataset, look_back):
    """













    Processing the data













    """

    dataX, dataY = [], []

    for i in range(len(dataset) - look_back - 1):
        a = dataset[i : (i + look_back), 1:]

        dataX.append(a)

        dataY.append(dataset[i + look_back, :])

    TrainX = np.array(dataX)

    Train_Y = np.array(dataY)

    return TrainX, Train_Y


# Multidimensional normalization returns data and maximum and minimum values


def NormalizeMult(data):
    # normalize Used for denormalization

    data = np.array(data)

    normalize = np.arange(2 * data.shape[1], dtype="float64")

    normalize = normalize.reshape(data.shape[1], 2)

    print(normalize.shape)

    for i in range(0, data.shape[1]):
        # Column i

        list = data[:, i]

        listlow, listhigh = np.percentile(list, [0, 100])

        # print(i)

        normalize[i, 0] = listlow

        normalize[i, 1] = listhigh

        delta = listhigh - listlow

        if delta != 0:
            # Row j

            for j in range(0, data.shape[0]):
                data[j, i] = (data[j, i] - listlow) / delta

    # np.save("./normalize.npy",normalize)

    return data, normalize


# Multidimensional denormalization


def FNormalizeMult(data, normalize):
    data = np.array(data)

    for i in range(0, data.shape[1]):
        listlow = normalize[i, 0]

        listhigh = normalize[i, 1]

        delta = listhigh - listlow

        if delta != 0:
            # Row j

            for j in range(0, data.shape[0]):
                data[j, i] = data[j, i] * delta + listlow

    return data


def attention_model():
    inputs = Input(shape=(TIME_STEPS, INPUT_DIMS))

    x = Conv1D(filters=64, kernel_size=1, activation="relu")(
        inputs
    )  # , padding = 'same'

    x = Dropout(drop)(x)

    # lstm_out = Bidirectional(LSTM(lstm_units, activation='relu'), name='bilstm')(x)

    # For GPU you can use CuDNNLSTM

    lstm_out = Bidirectional(LSTM(lstm_units, return_sequences=True))(x)

    lstm_out = Dropout(drop)(lstm_out)

    attention_mul = attention_3d_block(lstm_out)

    attention_mul = Flatten()(attention_mul)

    # output = Dense(1, activation='sigmoid')(attention_mul)

    output = Dense(1, activation="linear")(attention_mul)

    model = Model(inputs=[inputs], outputs=output)
    return model


ModuleNotFoundError: No module named 'keras.layers.merging'

In [None]:
data = pd.read_csv("P_2S_700K_1000K.csv")
data

In [None]:
data.plot(legend=True, subplots=True, figsize=(12,8), xlabel="STEP")
plt.show()

In [None]:
data = data.drop(['time'], axis = 1)
print(data.columns)
print(data.shape)

### 改各自层数！

In [None]:
INPUT_DIMS = 13
TIME_STEPS = 50
lstm_units = 64
drop = 0

#Normalized
data,normalize = NormalizeMult(data)
pollution_data = data[:,0].reshape(len(data),1)

train_X, _ = create_dataset(data,TIME_STEPS)
_ , train_Y = create_dataset(pollution_data,TIME_STEPS)

print(train_X.shape,train_Y.shape)

m = attention_model()
m.summary()
m.compile(optimizer='adam', loss='mse')
#m.fit([train_X], train_Y, epochs=10, batch_size=64, validation_split=0.1)
#m.save("./model.h5")
#np.save("normalize.npy",normalize)

In [None]:
from sklearn.model_selection import train_test_split
df = pd.read_csv("P_2S_700K_1000K.csv")
df = df.drop(['time'], axis = 1)
train_size = int(len(df)*0.9)#数据划分
train = df.iloc[:train_size,:]
test = df.iloc[train_size:,:]
#train, test = train_test_split(df, test_size=0.1)
print("len(train):",len(train))
print("len(test):",len(test))
     

In [None]:
import keras.backend as K
def r2_keras(y_true, y_pred):
    """Coefficient of Determination 
    """
    SS_res =  K.sum(K.square( y_true - y_pred ))
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )
     

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint
model_path = './model.h5'
# TRAIN
INPUT_DIMS = 11
TIME_STEPS = 50
lstm_units = 64

#Normalized
train,normalize = NormalizeMult(train)
pollution_data = train[:,0].reshape(len(train),1)

train_X, _ = create_dataset(train,TIME_STEPS)
_ , train_Y = create_dataset(pollution_data,TIME_STEPS)
print(train_X.shape,train_Y.shape)

m = attention_model()
m.summary()
#m.compile(optimizer='adam', loss='mse')
m.compile(loss='mae', optimizer='adam',metrics=['mae',r2_keras])

# fit the network
history =  m.fit([train_X], train_Y, epochs=30, batch_size=30, validation_split=0.1)
#history = m.fit([train_X], train_Y, epochs=10, batch_size=64, validation_split=0.05, verbose=2,
#          callbacks = [EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min'),
#                       ModelCheckpoint(model_path,monitor='val_loss', save_best_only=True, mode='min', verbose=0)]
#          )
     

In [None]:
m.save("./C_L_A.h5")

In [None]:
import matplotlib.pyplot as plt

# summarize history for R^2
plt.plot(history.history['r2_keras'])
plt.plot(history.history['val_r2_keras'])
plt.title('model r^2')
plt.ylabel('R^2')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# summarize history for Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# TEST
train,normalize = NormalizeMult(train)
pollution_test = train[:,0].reshape(len(train),1)

test_X, _ = create_dataset(train,TIME_STEPS)
_ , test_Y = create_dataset(pollution_test,TIME_STEPS)
print(test_X.shape,test_Y.shape)

In [None]:
# Prediction
scores_test = m.evaluate([test_X], test_Y, verbose=2)

In [None]:
results = m.predict([test_X])

In [None]:
fig_acc = plt.figure(figsize=(9, 9))
plt.plot(results[0:27000])
plt.plot(test_Y[0:27000])
plt.title('real vs pred')
plt.ylabel('value')
plt.xlabel('epoch')
plt.xlim((19000, 23000))
plt.legend(['pred', 'real'], loc='upper left')
plt.show()

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import explained_variance_score
from sklearn.metrics import r2_score

print('以下是CNN_biLSTM_Attention误差')
print('R^2决定系数：',r2_score(test_Y[000:14000],results[00:14000]))
print('RMSE为：',np.sqrt(mean_squared_error(test_Y[2500:18000],results[2500:18000])))
print('MAPE为：',(abs(results[9000:15000] -test_Y[9000:15000])/ test_Y[9000:15000]).mean())

In [None]:
test_new=[]
predict_new=[] 

for k in range(len(results)):
    if test_Y[k]!=0:
        test_new.append(test_Y[k])
        predict_new.append(results[k])

def MAPE(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.fabs((y_true - y_pred) / y_true))
mape = format(MAPE(test_new, predict_new), '.4f') 
mape