In [29]:
import os
import pandas as pd
from os.path import expanduser
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [2]:
base_path = expanduser("~") + "/data/NSE-2020-09-22/"
files = os.listdir(base_path)
files

['britannia.csv', 'tcs.csv', 'hindustan_lvr.csv', 'reliance.csv', 'infy.csv']

In [3]:
file_name = files[0]
df = pd.read_csv(base_path + "/" + file_name)
df.index = pd.to_datetime(df.Date)
stock_name = file_name[:3]
df = df[["No. of Trades", "Close Price"]]
df.columns = [stock_name + "_trades", stock_name + "_price"]
df = df.loc[df.index.drop_duplicates(keep = "first")]
df.head(20)


Unnamed: 0_level_0,bri_trades,bri_price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-10-11,25631,5570.2
2018-10-12,24263,5757.5
2018-10-15,22204,5671.6
2018-10-16,13405,5700.5
2018-10-17,22435,5541.05
2018-10-19,30085,5563.5
2018-10-22,14898,5580.15
2018-10-23,22854,5484.55
2018-10-24,35057,5360.15
2018-10-25,49203,5309.3


In [4]:
def load_data(file_name):
    df = pd.read_csv(base_path + "/" + file_name)
    df.index = pd.to_datetime(df.Date)
    stock_name = file_name[:3]
    df = df[["No. of Trades", "Close Price"]]
    df.columns = [stock_name + "_trades", stock_name + "_price"]
    df = df.loc[df.index.drop_duplicates(keep = "first")]
    return df

dfs = [load_data(file) for file in files]

In [16]:
target = "tcs"
df_joined = dfs[0]
for df in dfs[1:]:
    df_joined = df_joined.merge(df, left_index = True, right_index = True)
df_joined = df_joined.loc[df_joined.index.drop_duplicates(keep = "first")]
df_joined["day"] = df_joined.index.weekday
#df_joined["month"] = df_joined.index.month
df_joined["lead"] = df_joined[target + "_price"].shift(-1)
df_joined.dropna(inplace = True)
df_joined["buy"] = np.where(df_joined["lead"] > df_joined[target + "_price"], 1, 0)
df_joined.drop(columns = "lead", inplace = True)
df_joined.head(20)

Unnamed: 0_level_0,bri_trades,bri_price,tcs_trades,tcs_price,hin_trades,hin_price,rel_trades,rel_price,inf_trades,inf_price,day,buy
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-10-11,25631,5570.2,217064,1979.95,108518,1526.1,278072,1087.8,285534,673.35,3,0
2018-10-12,24263,5757.5,337356,1918.3,78027,1569.6,192795,1126.55,214848,678.8,4,1
2018-10-15,22204,5671.6,102337,1949.5,119926,1526.3,200369,1139.75,121763,698.8,0,1
2018-10-16,13405,5700.5,116402,1962.3,53801,1546.2,153398,1163.8,266266,695.25,1,0
2018-10-17,22435,5541.05,149723,1929.4,49892,1562.0,203752,1151.3,280701,705.35,2,0
2018-10-19,30085,5563.5,152721,1913.2,60944,1578.4,526528,1101.3,245258,683.55,4,0
2018-10-22,14898,5580.15,104583,1903.0,47091,1584.9,324137,1062.65,118989,679.95,0,0
2018-10-23,22854,5484.55,150962,1843.65,37654,1552.05,232603,1054.7,206699,657.6,1,1
2018-10-24,35057,5360.15,143163,1848.5,42625,1585.95,218013,1045.75,151398,649.8,2,1
2018-10-25,49203,5309.3,160372,1853.0,71872,1571.55,233601,1030.8,196622,648.75,3,0


In [17]:
X = df_joined.iloc[:,:-1].values
y = df_joined.iloc[:, -1].values

print("X: ",X.shape, "y: ", y.shape)
trainig_size = int(len(X) * 0.7)
trainig_size

X:  (765, 11) y:  (765,)


535

In [18]:
X_std = keras.utils.normalize(X)
X_std.shape

(765, 11)

In [19]:
pd.Series(y).value_counts()

0    509
1    256
dtype: int64

In [20]:
pd.DataFrame(X_std).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
count,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0,765.0
mean,0.074137,0.007154,0.378461,0.006365,0.260485,0.005915,0.697819,0.004412,0.449347,0.002284,7e-06
std,0.078509,0.009571,0.138089,0.004996,0.115209,0.004246,0.165332,0.003356,0.16893,0.001794,1e-05
min,3.9e-05,1.8e-05,3e-06,0.001211,3e-06,0.001256,4e-06,0.000835,4e-06,0.00038,0.0
25%,0.00046,0.000105,0.290779,0.004249,0.188573,0.004094,0.601152,0.003062,0.338257,0.00154,2e-06
50%,0.070576,0.007214,0.370305,0.006044,0.248606,0.005658,0.712368,0.004224,0.442928,0.002197,5e-06
75%,0.109225,0.01081,0.463717,0.007689,0.306226,0.006988,0.815695,0.005359,0.545919,0.002814,9e-06
max,0.96844,0.177426,0.993775,0.115382,0.970182,0.096629,0.996766,0.078758,0.995665,0.042312,0.000229


In [21]:
from tensorflow import keras
from tensorflow.keras import preprocessing

In [35]:

tf.random.set_seed(1)

features = X_std.shape[-1]
seq_length = 10
seq_gen = preprocessing.timeseries_dataset_from_array(
    X_std,
    y,
    sequence_length = seq_length,
    sequence_stride=1,
    sampling_rate=1,
    batch_size=128,
    shuffle=False,
    seed=1,
    start_index=None,
    end_index=None,
)

model = keras.Sequential([
    keras.layers.Input(shape = (seq_length, features)),
    keras.layers.LSTM(128, return_sequences = True, activation="tanh"),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(rate = 0.1),
    
    keras.layers.LSTM(128, return_sequences = True, activation="tanh"),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(rate = 0.1),
    
    keras.layers.LSTM(128, return_sequences = False, activation="tanh"),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(rate = 0.1),
    keras.layers.Dense(10, activation="relu"),
    keras.layers.Dense(1, activation="sigmoid")
])

model.summary()

op = keras.optimizers.Adam(lr = 0.0001, decay = 1e-6)
model.compile(loss = keras.losses.categorical_crossentropy,
             optimizer = op, 
             metrics = ["accuracy"])
model.fit(seq_gen, epochs = 10)

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_42 (LSTM)               (None, 10, 128)           71680     
_________________________________________________________________
batch_normalization_42 (Batc (None, 10, 128)           512       
_________________________________________________________________
dropout_42 (Dropout)         (None, 10, 128)           0         
_________________________________________________________________
lstm_43 (LSTM)               (None, 10, 128)           131584    
_________________________________________________________________
batch_normalization_43 (Batc (None, 10, 128)           512       
_________________________________________________________________
dropout_43 (Dropout)         (None, 10, 128)           0         
_________________________________________________________________
lstm_44 (LSTM)               (None, 128)             

<tensorflow.python.keras.callbacks.History at 0x7f95577fba50>

In [31]:
a = np.random.random((10, 3))
b = np.random.randint(0, 2, (10))
a, b

(array([[0.35459672, 0.57901786, 0.97588582],
        [0.47207995, 0.20871322, 0.63273235],
        [0.08385671, 0.1668671 , 0.06375077],
        [0.41721806, 0.4332064 , 0.95896679],
        [0.64755989, 0.94232556, 0.38348314],
        [0.64519347, 0.9552901 , 0.95475908],
        [0.43374228, 0.84890182, 0.18150128],
        [0.10769463, 0.29212694, 0.80064019],
        [0.13972653, 0.20444009, 0.91904515],
        [0.38356391, 0.21725434, 0.52960282]]),
 array([0, 0, 1, 1, 0, 0, 1, 1, 1, 1]))

In [13]:
seq_length = 3
seq_gen = preprocessing.timeseries_dataset_from_array(
    a,
    b,
    sequence_length = seq_length,
    sequence_stride=1,
    sampling_rate=1,
    batch_size=128,
    shuffle=False,
    seed=1,
    start_index=None,
    end_index=None,
)
list(seq_gen)

[(<tf.Tensor: shape=(8, 3, 3), dtype=float64, numpy=
  array([[[0.08875194, 0.30353943, 0.99602572],
          [0.95511166, 0.81881605, 0.79247305],
          [0.31982508, 0.18598466, 0.50949379]],
  
         [[0.95511166, 0.81881605, 0.79247305],
          [0.31982508, 0.18598466, 0.50949379],
          [0.67150406, 0.37743908, 0.38580816]],
  
         [[0.31982508, 0.18598466, 0.50949379],
          [0.67150406, 0.37743908, 0.38580816],
          [0.55989825, 0.29849779, 0.74924467]],
  
         [[0.67150406, 0.37743908, 0.38580816],
          [0.55989825, 0.29849779, 0.74924467],
          [0.96512904, 0.0134961 , 0.41142135]],
  
         [[0.55989825, 0.29849779, 0.74924467],
          [0.96512904, 0.0134961 , 0.41142135],
          [0.53419779, 0.46922097, 0.3780943 ]],
  
         [[0.96512904, 0.0134961 , 0.41142135],
          [0.53419779, 0.46922097, 0.3780943 ],
          [0.16119755, 0.32565942, 0.52046512]],
  
         [[0.53419779, 0.46922097, 0.3780943 ],
          [