In [1]:
import pandas as  pd
import numpy as np
import warnings 
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import datetime 
from scipy import stats
import pandas_profiling
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_prefix_path = '/Users/elvis/ITProjects/GitHub/PythonTask/Competitions/datasets/ThePurchaseAndRedemption/'

In [3]:
data_balance = pd.read_csv(data_prefix_path+'user_balance_table.csv')
data_balance['date'] = pd.to_datetime(data_balance['report_date'],format='%Y%m%d')
data_balance.head()

Unnamed: 0,user_id,report_date,tBalance,yBalance,total_purchase_amt,direct_purchase_amt,purchase_bal_amt,purchase_bank_amt,total_redeem_amt,consume_amt,transfer_amt,tftobal_amt,tftocard_amt,share_amt,category1,category2,category3,category4,date
0,1,20140805,20385,20383,2,0,0,0,0,0,0,0,0,2,,,,,2014-08-05
1,1,20140808,20391,20389,2,0,0,0,0,0,0,0,0,2,,,,,2014-08-08
2,1,20140811,20397,20395,2,0,0,0,0,0,0,0,0,2,,,,,2014-08-11
3,1,20140814,20403,20401,2,0,0,0,0,0,0,0,0,2,,,,,2014-08-14
4,1,20140817,20409,20407,2,0,0,0,0,0,0,0,0,2,,,,,2014-08-17


In [4]:
data = data_balance.groupby('report_date')['total_purchase_amt','total_redeem_amt'].sum().reset_index()
data

Unnamed: 0,report_date,total_purchase_amt,total_redeem_amt
0,20130701,32488348,5525022
1,20130702,29037390,2554548
2,20130703,27270770,5953867
3,20130704,18321185,6410729
4,20130705,11648749,2763587
...,...,...,...
422,20140827,302194801,468164147
423,20140828,245082751,297893861
424,20140829,267554713,273756380
425,20140830,199708772,196374134


In [5]:
data.to_csv('date_label.csv')

In [6]:
"""
reference:https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
Note:
1.LSTMs are sensitive to the scale of the input data, specifically when the sigmoid (default) or tanh activation functions are used. It can be a good practice to rescale the data to the range of 0-to-1, also called normalizing.
2.The LSTM network expects the input data (X) to be provided with a specific array structure in the form of: [samples, time steps, features].
"""

import math
import numpy
import pandas
from keras.layers import LSTM, RNN, GRU, SimpleRNN
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
from keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
import os

numpy.random.seed(2019)


class RNNModel(object):
    def __init__(self, look_back=1, epochs_purchase=20, epochs_redeem=40, batch_size=1, verbose=2, patience=10, store_result=False):
        self.look_back = look_back
        self.epochs_purchase = epochs_purchase
        self.epochs_redeem = epochs_redeem
        self.batch_size = batch_size
        self.verbose = verbose
        self.store_result = store_result
        self.patience = patience
        self.purchase = pandas.read_csv('date_label.csv', usecols=[2], engine='python') 
        self.redeem = pandas.read_csv('date_label.csv', usecols=[3], engine='python')
        
    def access_data(self, data_frame):
        # load the data set
        data_set = data_frame.values
        data_set = data_set.astype('float32')

        # LSTMs are sensitive to the scale of the input data, specifically when the sigmoid (default) or tanh activation functions are used. It can be a good practice to rescale the data to the range of 0-to-1, also called normalizing.
        scaler = MinMaxScaler(feature_range=(0, 1))
        data_set = scaler.fit_transform(data_set)

        # reshape into X=t and Y=t+1
        train_x, train_y, test = self.create_data_set(data_set)

        # reshape input to be [samples, time steps, features]
        train_x = numpy.reshape(train_x, (train_x.shape[0], 1, train_x.shape[1]))
        return train_x, train_y, test, scaler

    # convert an array of values into a data set matrix
    def create_data_set(self, data_set):
        data_x, data_y = [], []
        for i in range(len(data_set)-self.look_back - 30):
            a = data_set[i:(i + self.look_back), 0]
            data_x.append(a)
            data_y.append(list(data_set[i + self.look_back: i + self.look_back + 30, 0]))
#         print(numpy.array(data_y).shape)
        return numpy.array(data_x), numpy.array(data_y), data_set[-self.look_back:, 0].reshape(1, 1, self.look_back)

    def rnn_model(self, train_x, train_y, epochs):
        model = Sequential()
        model.add(LSTM(64, input_shape=(1, self.look_back), return_sequences=True))
        model.add(LSTM(32, return_sequences=False))
        model.add(Dense(32))
        model.add(Dense(30))
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.summary()
        early_stopping = EarlyStopping('loss', patience=self.patience)
        history = model.fit(train_x, train_y, epochs=epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[early_stopping])
        return model

    def predict(self, model, data):
        prediction = model.predict(data)
        return prediction

    def plot_show(self, predict):
        predict = predict[['purchase', 'redeem']]
        predict.plot()
        plt.show()

    def run(self):
        purchase_train_x, purchase_train_y, purchase_test, purchase_scaler = self.access_data(self.purchase)
        redeem_train_x, redeem_train_y, redeem_test, redeem_scaler = self.access_data(self.redeem)
        print(redeem_train_x)
        print('purchase_train_x:',np.shape(purchase_train_x),'purchase_train_y:',np.shape(purchase_train_y),
             'redeem_train_x:',np.shape(redeem_train_x),'redeem_train_y:',np.shape(redeem_train_y))
        purchase_model = self.rnn_model(purchase_train_x, purchase_train_y, self.epochs_purchase)
        redeem_model = self.rnn_model(redeem_train_x, redeem_train_y, self.epochs_redeem)

        purchase_predict = self.predict(purchase_model, purchase_test)
        redeem_predict = self.predict(redeem_model, redeem_test)

        test_user = pandas.DataFrame({'report_date': [20140900 + i for i in range(1, 31)]})

        purchase = purchase_scaler.inverse_transform(purchase_predict).reshape(30, 1)
        redeem = redeem_scaler.inverse_transform(redeem_predict).reshape(30, 1)

        test_user['purchase'] = purchase
        test_user['redeem'] = redeem
        print(test_user)

        """Store submit file"""
        if self.store_result is True:
            test_user.to_csv('submit_lstm1.csv', encoding='utf-8', index=None, header=None)
            
        """plot result picture"""
        self.plot_show(test_user)
        
if __name__ == '__main__':
    initiation = RNNModel(look_back=40, epochs_purchase=150, epochs_redeem=230, batch_size=16, verbose=2, patience=50, store_result=True)
    initiation.run()

Using TensorFlow backend.


[[[0.00716242 0.0017188  0.00794832 ... 0.05148092 0.05119317 0.05225483]]

 [[0.0017188  0.00794832 0.00878555 ... 0.05119317 0.05225483 0.04581037]]

 [[0.00794832 0.00878555 0.00210188 ... 0.05225483 0.04581037 0.02520903]]

 ...

 [[0.5222233  0.42036986 0.65912163 ... 0.30236444 0.63108546 0.55318844]]

 [[0.42036986 0.65912163 0.6820178  ... 0.63108546 0.55318844 0.45539764]]

 [[0.65912163 0.6820178  0.782245   ... 0.55318844 0.45539764 0.50501776]]]
purchase_train_x: (357, 1, 40) purchase_train_y: (357, 30) redeem_train_x: (357, 1, 40) redeem_train_y: (357, 30)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 1, 64)             26880     
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dense_

KeyboardInterrupt: 