## Imports

In [27]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import cufflinks

cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl')

## Read Data

In [123]:
sanimads = pd.read_csv("SANIMA_2000-01-01_2021-12-31.csv", engine='python')
sanimads.head()

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
0,1,2021-12-29,169,40088.0,14203816.4,356.0,350.0,354.0
1,2,2021-12-28,225,36901.0,12875470.3,355.0,338.2,352.0
2,3,2021-12-27,311,63171.0,22589549.7,363.0,355.0,356.0
3,4,2021-12-26,312,53556.0,19125145.5,362.0,348.5,358.0
4,5,2021-12-23,267,37310.0,13066461.5,353.0,344.1,350.0


## Sort Data in Ascending order

In [125]:
sanimads = sanimads.sort_values(by="Date")
sanimads

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
2256,2257,2010-04-19,8,1025.0,405750.0,400.0,390.0,390.0
2255,2256,2010-04-20,12,1150.0,439525.0,385.0,379.0,384.0
2254,2255,2010-04-21,3,150.0,55850.0,377.0,370.0,370.0
2253,2254,2010-04-22,11,593.0,208378.0,365.0,342.0,359.0
2252,2253,2010-04-25,18,2825.0,969420.0,366.0,338.0,338.0
...,...,...,...,...,...,...,...,...
4,5,2021-12-23,267,37310.0,13066461.5,353.0,344.1,350.0
3,4,2021-12-26,312,53556.0,19125145.5,362.0,348.5,358.0
2,3,2021-12-27,311,63171.0,22589549.7,363.0,355.0,356.0
1,2,2021-12-28,225,36901.0,12875470.3,355.0,338.2,352.0


## Handle Missing NAs

In [126]:
sanimads= sanimads.dropna()
sanimads.tail()

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
4,5,2021-12-23,267,37310.0,13066461.5,353.0,344.1,350.0
3,4,2021-12-26,312,53556.0,19125145.5,362.0,348.5,358.0
2,3,2021-12-27,311,63171.0,22589549.7,363.0,355.0,356.0
1,2,2021-12-28,225,36901.0,12875470.3,355.0,338.2,352.0
0,1,2021-12-29,169,40088.0,14203816.4,356.0,350.0,354.0


## Plot Trend

In [127]:
sanimads.iplot(kind="line",x="Date",y="Close Price",xTitle="Date", yTitle="Close Price", title="SANIMA Trend")

## Next Price

In [128]:
sanimads['Next Price'] = sanimads.shift(-1)['Close Price']

## Rename Columns

In [129]:
sanimads.rename(columns = {'S.N.':'SN','Total Transactions':'TTrans', 'Total Traded Shares':'TTS',
                              'Total Traded Amount':'TTA','Close Price':'ClosePrice','Next Price':'NextPrice',
                     'Max. Price':'MaxPrice','Min. Price':'MinPrice'}, inplace = True)

## Prepare Features

In [130]:
features = ['ClosePrice',"Date"]
outFeature = ['NextPrice',"Date"]

In [131]:
# take close price as feature and next price as out feature
X, Y = sanimads[features], sanimads[outFeature] 
X.set_index("Date", inplace=True)
Y.set_index("Date", inplace=True)

## Normalize Data

In [132]:
#Normalize data using standard scalar.
ss = StandardScaler()
# print(type(X))
# print(X)
X["ClosePrice"] = ss.fit_transform(X)
Y["NextPrice"] = ss.fit_transform(Y)
# print(type(Xs))
# print(Xs)
# print(Ys)

## Train Test Split

Using train_test_split of sklearn will split data randomly so the sequence is lost.

In [133]:
def train_test_split(x,y,train_size):
    n = int(len(x)*train_size)
    trainx = x[:n]
    testx = x[n:]
    trainy=y[:n]
    testy=y[n:]
    return trainx,testx,trainy,testy

In [134]:
#Split train, validation, test set from X,Y to 80%, 10%, 10%

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9)

# X_test, X_valid, Y_test, Y_valid = train_test_split(X_test, Y_test, train_size=0.5)

# print(X_train.shape)
# print(X_valid.shape)
# print(X_test.shape)

# print(Y_train.shape)
# print(Y_valid.shape)
# print(Y_test.shape)

In [135]:
X_train.shape,X_test.shape,X.shape

((2031, 1), (226, 1), (2257, 1))

## Convert to Matrix
https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html

In [136]:
# convert into dataset matrix
def convertToMatrix(data, step):
    data = data.to_numpy()
    X, Y =[], []
    for i in range(len(data)-step):
        d=i+step  
        X.append(data[i:d,])
        Y.append(data[d,])
    return np.array(X), np.array(Y)

step=5

trainX,trainY =convertToMatrix(X_train,step)
testX,testY =convertToMatrix(X_test,step)
# validX,validY = convertToMatrix(X_valid,step)

trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
# validX = np.reshape(validX, (validX.shape[0], 1, validX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

trainX.shape,testX.shape #,validX.shape

((2026, 1, 5), (221, 1, 5))

## Modeling

In [156]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN


# SimpleRNN model
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(1,step), activation="relu"))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_12 (SimpleRNN)   (None, 32)                1216      
                                                                 
 dense_24 (Dense)            (None, 8)                 264       
                                                                 
 dense_25 (Dense)            (None, 1)                 9         
                                                                 
Total params: 1,489
Trainable params: 1,489
Non-trainable params: 0
_________________________________________________________________


In [168]:
# we have 90% of data as train and if we take 11.11% from train, that will be equal to 10% of overall data as validation
model.fit(trainX,trainY, validation_split=0.111,#validation_data=(validX,validY),
          epochs=100, batch_size=16, verbose=2)
trainPredict = model.predict(trainX)
testPredict= model.predict(testX)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

Epoch 1/100
113/113 - 0s - loss: 0.0159 - val_loss: 0.0021 - 201ms/epoch - 2ms/step
Epoch 2/100
113/113 - 0s - loss: 0.0158 - val_loss: 0.0025 - 174ms/epoch - 2ms/step
Epoch 3/100
113/113 - 0s - loss: 0.0158 - val_loss: 0.0023 - 171ms/epoch - 2ms/step
Epoch 4/100
113/113 - 0s - loss: 0.0157 - val_loss: 0.0022 - 174ms/epoch - 2ms/step
Epoch 5/100
113/113 - 0s - loss: 0.0159 - val_loss: 0.0022 - 176ms/epoch - 2ms/step
Epoch 6/100
113/113 - 0s - loss: 0.0159 - val_loss: 0.0034 - 215ms/epoch - 2ms/step
Epoch 7/100
113/113 - 0s - loss: 0.0158 - val_loss: 0.0021 - 175ms/epoch - 2ms/step
Epoch 8/100
113/113 - 0s - loss: 0.0158 - val_loss: 0.0022 - 192ms/epoch - 2ms/step
Epoch 9/100
113/113 - 0s - loss: 0.0158 - val_loss: 0.0024 - 170ms/epoch - 2ms/step
Epoch 10/100
113/113 - 0s - loss: 0.0158 - val_loss: 0.0029 - 174ms/epoch - 2ms/step
Epoch 11/100
113/113 - 0s - loss: 0.0159 - val_loss: 0.0022 - 177ms/epoch - 2ms/step
Epoch 12/100
113/113 - 0s - loss: 0.0158 - val_loss: 0.0031 - 187ms/epoch 

Epoch 98/100
113/113 - 0s - loss: 0.0154 - val_loss: 0.0025 - 167ms/epoch - 1ms/step
Epoch 99/100
113/113 - 0s - loss: 0.0156 - val_loss: 0.0025 - 167ms/epoch - 1ms/step
Epoch 100/100
113/113 - 0s - loss: 0.0157 - val_loss: 0.0032 - 169ms/epoch - 1ms/step


## Evaluate

Smaller the better.

In [169]:
test_score = model.evaluate(testX, testPredict, verbose=0)
print(test_score)

1.031100829989185e-15


In [170]:
# testPredict

## RMSE of Test

In [171]:
from sklearn.metrics import mean_squared_error

mean_squared_error(ss.inverse_transform(testX[:,:,0].flatten()), 
                   ss.inverse_transform(testPredict.flatten()), squared=False)

20.77213452547952

## Predict Trend

In [166]:
# testPredict

In [147]:
# testX[:,:,0]
# plt.plot(X_test.index[5:],testX[:,:,0])
# plt.plot(X_test.index[5:],testPredict.flatten())
# plt.legend(["True Data", "Predicted"])
# plt.show()

In [154]:
testX.shape

(221, 1, 5)

In [172]:
test=pd.DataFrame()
test["pred"]=ss.inverse_transform(testPredict.flatten())
test["real"]=ss.inverse_transform(testX[:,:,0])
test["date"]=X_test.index[:-5]
test.iplot(kind="line", x="date")

## Train Trend

In [173]:
train=pd.DataFrame()
train["pred"]=ss.inverse_transform(trainPredict.flatten())
train["real"]=ss.inverse_transform(trainX[:,:,0])
train["date"]=X_train.index[:-5]
train.iplot(kind="line", x="date")