# Modeling 
Here, I have tried to use LSTM to predict Low and high values for a day. Features are one minute stock values from 9:15 to 11:00. 
Right now, we will make a model for only one stock.

In [1]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.layers import LSTM, Dense, Dropout
from keras.models import Sequential

import pandas as pd
import numpy as np
import math

In [15]:
stock_file_path = r"U:\Study\Durham_AI_Course\Projects\AI Algorithms\dataset\FullDataCsv\TATASTEEL__EQ__NSE__NSE__MINUTE.csv"

stock_df = pd.read_csv(stock_file_path)
stock_df.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2017-01-02 09:15:00+05:30,392.05,392.15,390.9,390.9,39465.0
1,2017-01-02 09:16:00+05:30,391.0,391.35,390.5,390.9,19199.0
2,2017-01-02 09:17:00+05:30,390.65,390.9,390.1,390.1,19773.0
3,2017-01-02 09:18:00+05:30,390.0,390.0,389.3,389.75,17499.0
4,2017-01-02 09:19:00+05:30,389.75,390.0,389.55,389.8,13701.0


#### Let's get one minute data day wise

In [16]:
def get_one_minute_data(file_path, train_steps_size=105, test_steps_size=355, last_days=0):
    stock_df = pd.read_csv(file_path)

    stock_df['timestamp'] = pd.to_datetime(stock_df['timestamp'])
    stock_df_groups = stock_df.groupby(pd.DatetimeIndex(stock_df['timestamp']).normalize())

    one_minute_data = []
    low_high_next_numbers = []

    count = 0

    previous_data = []
    
    for i, item in enumerate(stock_df_groups):
        name, group = item
        
        if int(group.isnull().sum().sum()) > 0:
            count += 1
            continue

        if len(group['close']) < test_steps_size:
            continue
            
        numbers = list(group['close'].iloc[:train_steps_size].values)
        
        if i < last_days:
            previous_data = list(group['close'].values)
            continue
        
        if last_days > 0:
            if len(previous_data) != last_days * 375:
                continue
        
        new_numbers = previous_data + numbers
        
        if last_days > 0:
            previous_data = list(group['close'].values)
        
        scaler = MinMaxScaler()
        new_data = scaler.fit_transform(np.reshape(np.asarray(new_numbers),(-1,1)).astype(np.float32))
        
        one_minute_data.append(new_data)

        next_numbers = group['close'].iloc[train_steps_size:test_steps_size].values.astype(np.float32)

        high = max(next_numbers)
        low = min(next_numbers)
        
        new_low, new_high = scaler.transform([[low,high]]).astype(np.float32)[0]

        low_high_next_numbers.append((new_low, new_high, scaler.transform([next_numbers])[0]))

    one_minute_data = np.asarray(one_minute_data).astype(np.float32)
    low_high_next_numbers = np.asarray(low_high_next_numbers)
    return one_minute_data, low_high_next_numbers

# Train an LSTM Model

### Split Data

In [17]:
train_steps_size = 105
features = 1
batch_size = 100

In [18]:
one_minute_data, low_high_next_numbers = get_one_minute_data(stock_file_path, train_steps_size=train_steps_size, last_days=0) 

In [19]:
train_x, test_x, train_y, test_y = train_test_split(one_minute_data, low_high_next_numbers, random_state=1)

train_low_values = train_y[:,0].astype(np.float32)
train_high_values = train_y[:,1].astype(np.float32)

test_low_values = test_y[:,0].astype(np.float32)
test_high_values = test_y[:,1].astype(np.float32)

In [20]:
train_x.shape

(662, 105, 1)

So, above shape is perfect for the LSTM. It needs to be a 3d array (samples, timesteps, features). We have only one feature, closing price for each minute. Time steps are 105 for 9:15 AM to 11:00 AM.

In [21]:
test_x.shape

(221, 105, 1)

In [22]:
train_low_values.shape

(662,)

In [23]:
train_high_values.shape

(662,)

Input Data Seem Proper. So, now let's feed it to LSTM.

In [24]:
low_model = Sequential()

# A bit complex model.
# low_model.add(LSTM(200, input_shape=(train_steps_size, features), return_sequences=True))
# low_model.add(Dropout(0.2))
# low_model.add(LSTM(200))
# low_model.add(Dropout(0.2))
# low_model.add(Dense(16))
# low_model.add(Dense(1))

# Simple Model 1.
low_model.add(LSTM(200, input_shape=(train_steps_size, features)))
low_model.add(Dense(16))
low_model.add(Dense(1))

# Simple Model 2.
# low_model.add(LSTM(200, input_shape=(train_steps_size, features)))
# low_model.add(Dense(32))
# low_model.add(Dense(16))
# low_model.add(Dense(1))

low_model.compile(loss='mean_squared_error', optimizer='adam')

low_model.fit(train_x, train_low_values, epochs=10, batch_size=batch_size, verbose=2)

pred_low = low_model.predict(test_x)
test_score = math.sqrt(mean_squared_error(test_low_values, pred_low[:,0]))
print("Test Score: %.2f RMSE" % test_score)

Epoch 1/10
7/7 - 1s - loss: 0.5763
Epoch 2/10
7/7 - 1s - loss: 0.5068
Epoch 3/10
7/7 - 1s - loss: 0.4724
Epoch 4/10
7/7 - 1s - loss: 0.4597
Epoch 5/10
7/7 - 1s - loss: 0.4526
Epoch 6/10
7/7 - 1s - loss: 0.4541
Epoch 7/10
7/7 - 1s - loss: 0.4494
Epoch 8/10
7/7 - 1s - loss: 0.4499
Epoch 9/10
7/7 - 1s - loss: 0.4469
Epoch 10/10
7/7 - 1s - loss: 0.4493
Test Score: 0.62 RMSE


In [25]:
stock_df.iloc[train_steps_size:].head()

Unnamed: 0,timestamp,open,high,low,close,volume
105,2017-01-02 11:00:00+05:30,394.6,394.7,394.2,394.25,3607.0
106,2017-01-02 11:01:00+05:30,394.3,394.3,394.1,394.3,2263.0
107,2017-01-02 11:02:00+05:30,394.3,394.45,394.0,394.25,7831.0
108,2017-01-02 11:03:00+05:30,394.25,394.45,394.1,394.4,1869.0
109,2017-01-02 11:04:00+05:30,394.4,394.4,393.9,394.0,4646.0


In [26]:
high_model = Sequential()

high_model.add(LSTM(200, input_shape=(train_steps_size, features)))
high_model.add(Dense(16))
high_model.add(Dense(1))

high_model.compile(loss='mean_squared_error', optimizer='adam')

high_model.fit(train_x, train_high_values, epochs=10, batch_size=batch_size, verbose=2)

pred_high = high_model.predict(test_x)
test_score = math.sqrt(mean_squared_error(test_high_values, pred_high[:,0]))
print("Test Score: %.2f RMSE" % test_score)

Epoch 1/10
7/7 - 1s - loss: 1.0679
Epoch 2/10
7/7 - 1s - loss: 0.6221
Epoch 3/10
7/7 - 1s - loss: 0.5829
Epoch 4/10
7/7 - 1s - loss: 0.5508
Epoch 5/10
7/7 - 1s - loss: 0.5353
Epoch 6/10
7/7 - 1s - loss: 0.5309
Epoch 7/10
7/7 - 1s - loss: 0.5282
Epoch 8/10
7/7 - 1s - loss: 0.5286
Epoch 9/10
7/7 - 1s - loss: 0.5274
Epoch 10/10
7/7 - 1s - loss: 0.5234
Test Score: 0.60 RMSE


# Save Model

In [27]:
low_model.save("low_model")
high_model.save("high_model")

INFO:tensorflow:Assets written to: low_model\assets
INFO:tensorflow:Assets written to: high_model\assets


# Evaluate Model by Trading Strategy 

Our strategy is quite simple. Target is gettin 0.5% profit and maximum loss we can bear is also 0.5%, So risk:reward is 1:1.

In [28]:
profit=0.5
stoploss=-0.5
test_next_numbers = low_high_next_numbers[:, 2]

test_samples_size = len(test_x)

pred_y_low = low_model.predict(test_x)
pred_y_high = high_model.predict(test_x)

count = 0
success = 0
fail = 0

for i in range(test_samples_size):
    true_low = test_low_values[i]
    true_high = test_high_values[i]

    predicted_low = pred_y_low[i]
    predicted_high = pred_y_low[i]

    buy_value = predicted_low

    if true_low <= buy_value <= true_high:
        count += 1

        sell_value = buy_value + (buy_value * profit) / 100
        stop_loss_value = buy_value + (buy_value * stoploss) / 100

        bought = False

        for value in test_next_numbers[i]:
            if bought:
                if value <= stop_loss_value:
                    fail += 1
                    break
                elif value >= sell_value:
                    success += 1
                    break
            else:
                if value <= buy_value:
                    bought = True

print(test_samples_size)
print("Possible Trades:", count)
print("Successful Trades:", success)
print("Fail Trades:", fail)

win_ratio = round(success / (success + fail), 4)

print("Win Ratio:", win_ratio)

221
Possible Trades: 97
Successful Trades: 15
Fail Trades: 38
Win Ratio: 0.283
