In [7]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import csv
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix,classification_report
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers import TimeDistributed,BatchNormalization
from keras import optimizers
import pickle
np.random.seed(1234)
np.set_printoptions(suppress=True)


## Combine multivariate
https://stackoverflow.com/questions/36992855/keras-how-should-i-prepare-input-data-for-rnn

# Data preparation

## Convert data to sequences

In [2]:
COLUMNS = [u'Open', u'High', u'Low', u'Close', u'Volume ', u'KC_M_1', u'KC_U_1',
       u'KC_D_1', u'KC_M_2', u'KC_U_2', u'KC_D_2', u'KC_M_3', u'KC_U_3',
       u'KC_D_3', u'Donchian_High_3', u'Donchian_Low_3', u'ATR', u'EMA12',
       u'EMA30', u'cs_size', u'cs_body_size', u'cs_body_pos', u'cs_body_ratio']

COLUMNS_BOOLEAN =  ['DOWN_next_Low_under_Close',
 'DOWN_next_High_over_Close',
 'UP_next_High_over_Close',
 'UP_next_Low_under_Close',
 'DOWN_next_Low_and_High',
 'UP_next_Low_and_High']


In [3]:
def convert_to_sequences(df,predictors_name_list, target_name):
    
    df = df.drop('date_time',axis=1)
    
    # drop the first 30 lines, because EMA30 is empty
    df = df[30:-1]
    df = df[-3000:]
    print df.tail(1)
    print df.head(1)
    
    COL_DAY_BEFORE = [col+"_day_before" for col in COLUMNS_BOOLEAN]
    df[COL_DAY_BEFORE] = df[COLUMNS_BOOLEAN].shift(1)
    day_before = df[COL_DAY_BEFORE].values
    day_before = np.nan_to_num(day_before)
    print day_before    
    
    #df = df[:1000]
    
    # add an index column to check the correct alignment between predictor & target
    df['index']=df.index
    index = df.index.values
    index = index.reshape(index.size,1)

    predictors = df[predictors_name_list].values
   
    target = df[['index']+ target_name].values
    print "target :"
    print target
        
    # regularize
    minmax = preprocessing.StandardScaler()
    print predictors[0]
    print predictors.shape
    
    predictors_scaled = minmax.fit_transform(predictors)
    predictors_scaled = predictors
    print predictors[:,0][0]
    print predictors_scaled[0]
    
    
    print "shapes"
    print day_before.shape
    print index.shape
    print predictors_scaled.shape
    
    print "end shape"
    #predictors_scaled_with_index = np.concatenate((index,predictors_scaled,day_before),axis=1)
    predictors_scaled_with_index = np.concatenate((index,predictors_scaled),axis=1)

    #predictors_scaled_with_index = np.concatenate((index,day_before),axis=1)
  
    print predictors_scaled_with_index[:5]

    return predictors_scaled_with_index,target

In [4]:
def create_sequences(predictors,target, drop_index=False):
    drop_ix = drop_index*1

    flag = 0
    time_steps = 50

    for sample in range(time_steps,predictors.shape[0]):
        
        tmp_predictors = np.array([predictors[sample-time_steps:sample][:,range(drop_ix,predictors.shape[1])]])
        
        if flag==0:
            new_input_predictor = tmp_predictors
            flag = 1

        else:
            new_input_predictor = np.concatenate((new_input_predictor,tmp_predictors))
            
    new_input_target = target[time_steps-1:-1][:,range(drop_ix,target.shape[1])]
    
            
    return new_input_predictor,new_input_target

In [5]:
df = pd.read_csv("tradeModel_data.csv")

df[COLUMNS_BOOLEAN].describe().loc['mean']

DOWN_next_Low_under_Close    0.070066
DOWN_next_High_over_Close    0.000392
UP_next_High_over_Close      0.069574
UP_next_Low_under_Close      0.088129
DOWN_next_Low_and_High       0.000392
UP_next_Low_and_High         0.003382
Name: mean, dtype: float64

In [6]:
df['next_day_High_UP']=((df.High<df.next_High))*1
print df.next_day_High_UP.head()
print df.next_day_High_UP.mean()

0    1
1    0
2    0
3    1
4    0
Name: next_day_High_UP, dtype: int64
0.460745120435


In [7]:
predictors,target = convert_to_sequences(df,[ u'Open', u'High', u'Low', u'Close',u'Volume ',u'Donchian_High_3', u'Donchian_Low_3', u'ATR', u'EMA12',
       u'EMA30', u'cs_size', u'cs_body_ratio'],['next_day_High_UP'])

          Open     High      Low    Close  Volume     KC_M_1    KC_U_1  \
89299  1.06085  1.06158  1.06072  1.06145   1287.7  1.061029  1.061609   

         KC_D_1    KC_M_2    KC_U_2        ...         \
89299  1.060449  1.061029  1.061609        ...          

       DOWN_next_High_over_Close  UP_next_High_over_Close  \
89299                          0                        0   

       UP_next_Low_under_Close  DOWN_next_Low_and_High  UP_next_Low_and_High  \
89299                        0                       0                     0   

       hour  day  month  year  next_day_High_UP  
89299    21    4      2  2017                 0  

[1 rows x 37 columns]
          Open     High      Low    Close  Volume     KC_M_1    KC_U_1  \
86300  1.06072  1.06161  1.06056  1.06123  4036.48  1.061009  1.062266   

         KC_D_1    KC_M_2    KC_U_2        ...         \
86300  1.059752  1.061009  1.062266        ...          

       DOWN_next_High_over_Close  UP_next_High_over_Close  \
8630

In [8]:

print predictors[:10]
print "\n ++++++++++++++++++ \n"
print target[:10]

[[ 86300.              1.06072         1.06161         1.06056         1.06123
    4036.48            1.06204         1.06004         0.00112665
       1.06207514      1.06235171      0.00105         0.00011   ]
 [ 86301.              1.0612          1.06154         1.06043         1.06143
    4131.87            1.06161         1.06004         0.00112249
       1.06197589      1.06229225      0.00111        -0.00033   ]
 [ 86302.              1.06144         1.06311         1.06107         1.06274
    7921.07            1.06311         1.06043         0.00135187
       1.06209344      1.06232113      0.00204         0.        ]
 [ 86303.              1.06273         1.06365         1.062           1.06273
    6532.06            1.06365         1.06043         0.0014264
       1.06219137      1.06234751      0.00165         0.000095  ]
 [ 86304.              1.06273         1.06361         1.06215         1.06224
    5177.35            1.06365         1.06107         0.0014348
       1.

In [9]:
print predictors.shape,target.shape

(3000, 13) (3000, 2)


In [10]:
predictors_sequenced,target_sequenced = create_sequences(predictors,target,drop_index=True)
print predictors_sequenced.shape
print target_sequenced.shape

(2950, 50, 12)
(2950, 1)


In [11]:
predictors_sequenced[0][:5]

array([[    1.06072   ,     1.06161   ,     1.06056   ,     1.06123   ,
         4036.48      ,     1.06204   ,     1.06004   ,     0.00112665,
            1.06207514,     1.06235171,     0.00105   ,     0.00011   ],
       [    1.0612    ,     1.06154   ,     1.06043   ,     1.06143   ,
         4131.87      ,     1.06161   ,     1.06004   ,     0.00112249,
            1.06197589,     1.06229225,     0.00111   ,    -0.00033   ],
       [    1.06144   ,     1.06311   ,     1.06107   ,     1.06274   ,
         7921.07      ,     1.06311   ,     1.06043   ,     0.00135187,
            1.06209344,     1.06232113,     0.00204   ,     0.        ],
       [    1.06273   ,     1.06365   ,     1.062     ,     1.06273   ,
         6532.06      ,     1.06365   ,     1.06043   ,     0.0014264 ,
            1.06219137,     1.06234751,     0.00165   ,     0.000095  ],
       [    1.06273   ,     1.06361   ,     1.06215   ,     1.06224   ,
         5177.35      ,     1.06365   ,     1.06107   ,     

In [12]:
target_sequenced[0]

array([1])

In [13]:
predictors_sequenced.shape

(2950, 50, 12)

In [14]:
new_input_predictor = predictors_sequenced
new_input_target = target_sequenced

In [15]:
new_input_predictor[0][:5]

array([[    1.06072   ,     1.06161   ,     1.06056   ,     1.06123   ,
         4036.48      ,     1.06204   ,     1.06004   ,     0.00112665,
            1.06207514,     1.06235171,     0.00105   ,     0.00011   ],
       [    1.0612    ,     1.06154   ,     1.06043   ,     1.06143   ,
         4131.87      ,     1.06161   ,     1.06004   ,     0.00112249,
            1.06197589,     1.06229225,     0.00111   ,    -0.00033   ],
       [    1.06144   ,     1.06311   ,     1.06107   ,     1.06274   ,
         7921.07      ,     1.06311   ,     1.06043   ,     0.00135187,
            1.06209344,     1.06232113,     0.00204   ,     0.        ],
       [    1.06273   ,     1.06365   ,     1.062     ,     1.06273   ,
         6532.06      ,     1.06365   ,     1.06043   ,     0.0014264 ,
            1.06219137,     1.06234751,     0.00165   ,     0.000095  ],
       [    1.06273   ,     1.06361   ,     1.06215   ,     1.06224   ,
         5177.35      ,     1.06365   ,     1.06107   ,     

In [16]:
new_input_target

array([[1],
       [1],
       [1],
       ..., 
       [0],
       [1],
       [1]])

In [17]:
row = int(round(0.9 * new_input_predictor.shape[0]))

X_train = new_input_predictor[:row, :]
X_test = new_input_predictor[row:,:]

y_train = new_input_target[:row,]
y_test = new_input_target[row:,]

data = [X_train, y_train, X_test, y_test]

In [18]:

pickle.dump(data, open("data_trade.p", "wb"))

In [8]:
data = pickle.load(open("data_trade.p", "rb"))

In [9]:
data[0].shape

(2655, 50, 12)

In [10]:
def build_model():

    model = Sequential()
    layers = [12, 128, 512, 1]
    
    model.add(LSTM(units=128, return_sequences=True, input_shape=(None, layers[0])))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(LSTM(
            layers[2],
            return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(Dense(units=1))
 
    model.add(Activation("sigmoid"))   
    #model.add(Activation("linear"))   
    
    start = time.time()
    model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['binary_accuracy'])
    
    #model.compile(loss="mse", optimizer="rmsprop")
    
    print "Compilation Time : ", time.time() - start
    return model

In [11]:
def run_network(model=None, data=None):
    epochs = 10
    ratio = 0.5
    
    X_train, y_train, X_test, y_test = data

    
    print '\nData Loaded. Compiling...\n'
    
    if model is None:
        model = build_model()
        
    try:
        model.fit(
            X_train, y_train,
            batch_size=512, epochs=epochs, validation_split=0.05)
        
        print "finished fit"
        predicted = model.predict(X_test)
        print "finished predicting"
        
        predicted = np.reshape(predicted, (predicted.size,))
        print predicted
    except KeyboardInterrupt:
        print 'Training duration (s) : ', time.time() - global_start_time
    return model, y_test, 0


In [12]:
model,y_test,_ = run_network(data=data)


Data Loaded. Compiling...

Compilation Time :  0.0303571224213
Train on 2522 samples, validate on 133 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10

NameError: global name 'global_start_time' is not defined

In [None]:
prediction= model.predict(X_test)

In [None]:
prediction.shape

In [None]:
prediction.reshape(prediction.size,)

In [None]:
df_prediction = pd.DataFrame({'observed': y_test.reshape(y_test.size,),'predicted':prediction.reshape(prediction.size,)})

In [None]:
df_prediction.describe()

In [None]:
df_prediction['prediction_decision']=(df_prediction.predicted>0.5)*1

In [None]:
df_prediction.observed.value_counts()

In [None]:
confusion_matrix(df_prediction.observed,df_prediction.prediction_decision)

In [None]:
print classification_report(df_prediction.observed,df_prediction.prediction_decision)