In [1]:
import warnings 
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
from datetime import datetime, date
from scipy import stats
import matplotlib.pyplot as plt
import pickle

from keras.layers import Dense, Input, Dropout, LSTM, Conv1D, Conv2D, Flatten, MaxPool1D, MaxPool2D, AvgPool1D, AvgPool2D, Masking, RepeatVector, Add, Maximum, Average, BatchNormalization
from keras.layers.advanced_activations import LeakyReLU, PReLU, ELU
from keras.models import Model, Sequential
from livelossplot import PlotLossesKeras

Using TensorFlow backend.


In [36]:
df = pd.read_csv("PopcornSmallProphetInputDF.csv")
changepoints = pickle.load(open("changepoints.pkl",'rb'), encoding='latin').index
changepoints = changepoints.insert(0,0)
changepoints = sorted(changepoints.insert(-1,df.shape[0]))

In [37]:
col_idx = 1
m = -1

for i in range(len(changepoints)-1):
    if((changepoints[i+1]-changepoints[i]) > m): m = (changepoints[i+1]-changepoints[i])

d = pd.DataFrame()
for i in range(len(changepoints)-1):
    seq = df.values[changepoints[i]:changepoints[i+1],col_idx]
    lk = np.array([seq.shape[0]])
    sk,_,_,_,_ = stats.linregress(x=list(range(lk[0])), y=seq.astype('float'))
    tempZeros = np.zeros((m+2-tempTuple.shape[0],))
    zpTuple = np.hstack((tempZeros,seq))
    tempTuple = (np.hstack((np.hstack((lk,np.array(sk))),zpTuple)))
    tempTuple = tempTuple.reshape(1,-1)
    d = pd.concat([d,pd.DataFrame(tempTuple)], axis=0)

## Data Preprocess

In [49]:
def convert_3d_data(data, timesteps, n_features):
    for i in range(timesteps):
        data = pd.concat([data,data.iloc[:,-n_features:].shift(-1)], axis=1)
    x=data.iloc[:-timesteps,:-n_features]
    y=data.iloc[:-timesteps,-n_features+2:]
    return x,y

def data_creation(df,l_col_idx, timesteps, n_features):
    data = df[df.columns[l_col_idx]]
    x,y = convert_3d_data(data, timesteps, n_features)
    print(x.shape, y.shape)
    
    x_cnn = pd.DataFrame()
    x_lstm = pd.DataFrame()
    for i in range(timesteps):
        x_cnn = pd.concat([x_cnn,pd.DataFrame(x.values[:,i*(n_features)+2:i*(n_features)+n_features])], axis=1)
        x_lstm = pd.concat([x_lstm,pd.DataFrame(x.values[:,i*(n_features):i*(n_features)+2])], axis=1)
    print(x_cnn.shape, x_lstm.shape)
    train_split = int(0.7*x_cnn.shape[0])
    x_cnn_train = x_cnn[:train_split].values
    x_cnn_test = x_cnn[train_split:].values
    x_lstm_train = x_lstm[:train_split].values
    x_lstm_test = x_lstm[train_split:].values
    y_train = y[:train_split].values
    y_test = y[train_split:].values
        
    return x_cnn_train, x_lstm_train, y_train, x_cnn_test, x_lstm_test, y_test

In [50]:
l = d.columns
x_cnn_train, x_lstm_train, y_train, x_cnn_test, x_lstm_test, y_test = data_creation(d,l,2,m+2)

(24, 379) (24, 188)
(24, 375) (24, 4)
0       0
1       0
2       0
3       0
4       0
5       0
6       0
7       0
8       0
9       0
10      0
11      0
12      0
13      0
14      0
15      0
16      0
17      0
18      0
19      0
20      0
21      0
22      0
23      0
24      0
25      0
26      0
27      0
28      0
29      0
       ..
157    24
158    24
159    24
160    24
161    24
162    24
163    24
164    24
165    24
166    24
167    24
168    24
169    24
170    24
171    24
172    24
173    24
174    24
175    24
176    24
177    24
178    24
179    24
180    24
181    24
182    24
183    24
184    24
185    24
186    24
Length: 375, dtype: int64


In [121]:
def model_run(time_steps, cnn_shape, x_cnn_train, x_lstm_train, y_train, x_cnn_test, x_lstm_test, y_test):
    
    LSTM_inp = Input(shape=(time_steps,2))    
    x1 = LSTM(100)(LSTM_inp)
    x1 = LeakyReLU(0.3)(x1)
    x1 = Dropout(0.15)(x1)
    x1 = Dense(300)(x1)
    x1 = LeakyReLU(0.3)(x1)
    out_lstm = Dropout(0.2)(x1)
    
    CNN_inp = Input(shape=cnn_shape)
    x = Conv1D(filters=32,kernel_size=4)(CNN_inp)
    x = BatchNormalization()(x)
    x = Conv1D(filters=32,kernel_size=4)(x)
    x = BatchNormalization()(x)
    x = MaxPool1D(pool_size=2)(x)
    x = Dropout(0.2)(x)
    x = Flatten()(x)
    x = Dense(300)(x)
    out_cnn = LeakyReLU(0.3)(x)
    
    feat_fus = Add()([out_lstm,out_cnn])
    feat_fus = LeakyReLU(0.25)(feat_fus)
    feat_fus = Dense(cnn_shape[0])(feat_fus)
    feat_fus = LeakyReLU(0.3)(feat_fus)
    out_model = Dropout(0.12)(feat_fus)
    
    model = Model(inputs=[LSTM_inp,CNN_inp], outputs=[out_model], name='TreRNN')
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae','acc'])
    model.summary()
    model.fit([x_lstm,x_cnn], y_train, callbacks=[PlotLossesKeras()], verbose=0, batch_size=50, epochs=100, validation_data=([x_lstm_test,x_cnn_test],y_test))
    return model

In [14]:
d.shape

(26, 190)

In [None]:
model = model_run(10,(1,1), )