In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
import pandas as pd
pd.read_csv('../input/flower-app/App.csv')

In [5]:
import numpy as np
df_app = pd.read_csv('../input/flower-app/App.csv', index_col = 'Date', parse_dates = ['Date'])
df_app

In [6]:
#数据可视化
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
df_app['Activation'].plot(figsize=(12,4),legend = 4)
plt.title('APP Activation Count')
plt.show()

In [7]:
#数据清洗
df_app.isna().sum()

In [8]:
(df_app.Activation < 0).values.any()

In [9]:
#拆分测试集和训练集
Train = df_app[:'2020-09-30'].iloc[:,0:1].values
Test = df_app['2020-10-1':].iloc[:,0:1].values

In [12]:
Train

In [13]:
print('训练集的形状是：', Train.shape)
print('测试集的形状是：', Test.shape)

In [14]:
df_app['Activation'][:'2020-09-30'].plot(figsize=(12,4),legend = 4)
df_app['Activation']['2020-10-01':].plot(figsize=(12,4),legend = 4)
plt.legend(['Training set(Before October 2020)', 'Test set(October 2020 and beyond)'])
plt.title('APP Activation count')
plt.show()

In [16]:
#特征工程
from sklearn.preprocessing import MinMaxScaler
Scaler = MinMaxScaler(feature_range=(0,1))
Train = Scaler.fit_transform(Train)

In [17]:
#构建特征集和标签集
# 创建具有 60 个时间步长和 1 个输出的数据结构 - 训练集
X_train = []
y_train = []
for i in range(60, Train.size):
    X_train.append(Train[i-60:i,0]) #构建特征
    y_train.append(Train[i,0]) #构建标签
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1],1))

In [18]:
X_train.shape

In [19]:
TrainTest = df_app['Activation'][:]
inputs = TrainTest[len(TrainTest)-len(Test)-60:].values #test前加上60个时间步
inputs = inputs.reshape(-1,1)
inputs = Scaler.transform(inputs)

X_test = []
y_test = []
for i in range(60, inputs.size):
    X_test.append(inputs[i-60:i,0]) #构建特征
    y_test.append(inputs[i,0]) #构建标签
X_test, y_test = np.array(X_test), np.array(y_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1],1))


In [21]:
X_test.shape

In [26]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, LSTM
#LSTM网络架构
RNN_LSTM = Sequential()
RNN_LSTM.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1],1)))
RNN_LSTM.add(LSTM(units=50, return_sequences=True))
RNN_LSTM.add(LSTM(units=50, return_sequences=True))
RNN_LSTM.add(LSTM(units=50))
RNN_LSTM.add(Dense(units=1))
             
RNN_LSTM.compile(loss='mean_squared_error', 
                 optimizer='rmsprop',
                 metrics=['mae'])
RNN_LSTM.summary()

In [33]:
history = RNN_LSTM.fit(X_train, y_train, # 指定训练集
                  epochs=30,        # 指定训练的轮次
                  batch_size=64,    # 指定数据批量
                  validation_split=0.2) #这里直接从训练集数据中拆分验证集，更方便

In [34]:
def show_history(history):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(1,len(loss) + 1)
    plt.figure(figsize=(12,4))
    plt.subplot(1,2,1)
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Traning and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

In [35]:
show_history(history)

In [36]:
def plot_prediction(test,predicted):
    plt.plot(test, color='red', label='Real Count')
    plt.plot(predicted, color='blue', label='Predicted Count')
    plt.title('Flower App Activation Prediction')
    plt.xlabel('Time')
    plt.ylabel('Flower App Activation Count')
    plt.legend()
    plt.show

In [37]:
Pred = RNN_LSTM.predict(X_test)
Pred = Scaler.inverse_transform(Pred) #反归一化
plot_prediction(Test, Pred)