# Import

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, LSTM
import math
from sklearn.preprocessing import MinMaxScaler,StandardScaler 
from sklearn.model_selection import train_test_split
import cufflinks
cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl')

# Read Data

In [3]:
#data = "ALICL_2000-01-01_2021-12-31.csv"
insurance = pd.read_csv("ALICL_2000-01-01_2021-12-31.csv", engine='python')
insurance.head()


Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
0,1,2021-12-29,80,7301.0,8256887.7,1145.0,1117.2,1123.0
1,2,2021-12-28,182,18239.0,20247233.0,1150.0,1088.0,1125.0
2,3,2021-12-27,389,33991.0,39652953.1,1212.0,1124.0,1130.0
3,4,2021-12-26,395,43361.0,50376673.9,1188.0,1079.0,1188.0
4,5,2021-12-23,156,11832.0,12704046.9,1095.0,1040.0,1080.0


 # Sort Data in Ascending order

In [4]:
insurance = insurance.sort_values(by="Date")
insurance.head()

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
2545,2546,2010-05-12,6,60.0,21010.0,364.0,337.0,337.0
2544,2545,2010-05-20,4,100.0,32020.0,331.0,313.0,313.0
2543,2544,2010-05-23,5,120.0,35210.0,307.0,285.0,285.0
2542,2543,2010-05-24,6,140.0,37150.0,280.0,257.0,257.0
2541,2542,2010-05-25,60,1840.0,427440.0,252.0,232.0,232.0


# Handle Missing NAs

In [5]:
#Handle missing data by dropinig NAs.
insurance = insurance.dropna()
insurance.tail()

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
4,5,2021-12-23,156,11832.0,12704046.9,1095.0,1040.0,1080.0
3,4,2021-12-26,395,43361.0,50376673.9,1188.0,1079.0,1188.0
2,3,2021-12-27,389,33991.0,39652953.1,1212.0,1124.0,1130.0
1,2,2021-12-28,182,18239.0,20247233.0,1150.0,1088.0,1125.0
0,1,2021-12-29,80,7301.0,8256887.7,1145.0,1117.2,1123.0


# Plot Trend

In [7]:
insurance.iplot(kind="line",x="Date",y="Close Price",xTitle="Date", yTitle="Close Price", title="Insurance Data Trend")

# Next Price

In [9]:
insurance['Next Price'] = insurance.shift(-1)['Close Price']

In [10]:
insurance= insurance.dropna()


# Rename Columns

In [11]:
insurance.rename(columns = {'S.N.':'SN','Total Transactions':'TTrans', 'Total Traded Shares':'TTS',
                              'Total Traded Amount':'TTA','Close Price':'ClosePrice','Next Price':'NextPrice',
                     'Max. Price':'MaxPrice','Min. Price':'MinPrice'}, inplace = True)

# Prepare Feature

In [12]:
features = ['ClosePrice',"Date"]
outFeature = ['NextPrice',"Date"]

In [13]:
# take close price as feature and next price as out feature
X, Y = insurance[features], insurance[outFeature] 
X.set_index("Date", inplace=True)
Y.set_index("Date", inplace=True)

# Normalize Data

In [14]:
#Normalize data using standard scalar.
ss = StandardScaler()

X["ClosePrice"] = ss.fit_transform(X)
Y["NextPrice"] = ss.fit_transform(Y)


## Train Test Split

Using train_test_split of sklearn will split data randomly so the sequence is lost.

In [15]:
def train_test_split(x,y,train_size):
    n = int(len(x)*train_size)
    trainx = x[:n]
    testx = x[n:]
    trainy=y[:n]
    testy=y[n:]
    return trainx,testx,trainy,testy

In [16]:
#Split train, validation, test set from X,Y to 80%, 10%, 10%

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9)

In [17]:
X_train.shape,X_test.shape,X.shape

((2290, 1), (255, 1), (2545, 1))

In [18]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True,input_shape=(5,1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

# Convert to matrix

In [19]:
# convert into dataset matrix
def convertToMatrix(data, step):
    data = data.to_numpy()
    X, Y =[], []
    for i in range(len(data)-step):
        d=i+step  
        X.append(data[i:d,])
        Y.append(data[d,])
    return np.array(X), np.array(Y)

step=5

trainX,trainY =convertToMatrix(X_train,step)
testX,testY =convertToMatrix(X_test,step)
# validX,validY = convertToMatrix(X_valid,step)

trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1],1))
# validX = np.reshape(validX, (validX.shape[0], 1, validX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1],1))

trainX.shape,testX.shape #,validX.shape

((2285, 5, 1), (250, 5, 1))

In [24]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(trainX, trainY, batch_size=1, epochs=1)



<keras.callbacks.History at 0x12fc27269a0>

In [25]:
predictions = model.predict(testX)
predictions = ss.inverse_transform(predictions)
predictions.shape,testX.shape



((250, 1), (250, 5, 1))

In [26]:
Y_test_ss = ss.inverse_transform(Y_test)[:-5]

rmse=np.sqrt(np.mean(((predictions- Y_test_ss)**2)))
print(rmse)

41.366455506929114


# Test Trand

In [27]:
test=pd.DataFrame()
test["pred"]=ss.inverse_transform(predictions.flatten())
test["real"]=ss.inverse_transform(Y_test_ss)
test["date"]=X_test.index[:-5]
test.iplot(kind="line", x="date")