# Import

In [32]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import cufflinks

cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl')

# Read data

In [36]:
hydro = pd.read_csv("AKJCL_2000-01-01_2021-12-31.csv", engine='python')
hydro.head()

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
0,1,2021-12-29,293,44404.0,12159732.6,280.0,270.0,273.0
1,2,2021-12-28,474,81741.0,22254586.3,280.0,264.0,274.9
2,3,2021-12-27,660,118407.0,33305303.9,295.4,270.0,272.0
3,4,2021-12-26,608,115254.0,33393501.0,297.0,285.8,289.8
4,5,2021-12-23,819,128123.0,37791022.7,307.0,282.0,291.0


# Sort Data in Ascending Order

In [37]:
hydro = hydro.sort_values(by="Date")
hydro.head()

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
670,671,2018-12-20,75,1245.0,125345.0,138.0,86.0,102.0
669,670,2018-12-23,190,2920.0,295210.0,104.0,98.0,104.0
668,669,2018-12-24,142,2320.0,239350.0,106.0,100.0,102.0
667,668,2018-12-25,35,590.0,58880.0,102.0,98.0,101.0
666,667,2018-12-26,26,390.0,39490.0,102.0,100.0,101.0


# Handle Missing Data

In [38]:
hydro = hydro.dropna()
hydro.tail()

Unnamed: 0,S.N.,Date,Total Transactions,Total Traded Shares,Total Traded Amount,Max. Price,Min. Price,Close Price
4,5,2021-12-23,819,128123.0,37791022.7,307.0,282.0,291.0
3,4,2021-12-26,608,115254.0,33393501.0,297.0,285.8,289.8
2,3,2021-12-27,660,118407.0,33305303.9,295.4,270.0,272.0
1,2,2021-12-28,474,81741.0,22254586.3,280.0,264.0,274.9
0,1,2021-12-29,293,44404.0,12159732.6,280.0,270.0,273.0


# Plot Trend 

In [41]:
hydro.iplot(kind="line",x="Date",y="Close Price",xTitle="Date", yTitle="Close Price", title="Hydropower Trend")

# Next Price

In [42]:
hydro['Next Price'] = hydro.shift(-1)['Close Price']

In [43]:
hydro = hydro.dropna()

# Rename Columns

In [44]:
hydro.columns

Index(['S.N.', 'Date', 'Total Transactions', 'Total Traded Shares',
       'Total Traded Amount', 'Max. Price', 'Min. Price', 'Close Price',
       'Next Price'],
      dtype='object')

In [45]:
hydro.rename(columns = {'S.N.':'SN','Total Transactions':'TTrans', 'Total Traded Shares':'TTS',
                              'Total Traded Amount':'TTA','Close Price':'ClosePrice','Next Price':'NextPrice',
                     'Max. Price':'MaxPrice','Min. Price':'MinPrice'}, inplace = True)

# Prepared Features

In [46]:
features = ['ClosePrice',"Date"]
outFeature = ['NextPrice',"Date"]

In [47]:
# take close price as feature and next price as out feature
X, Y = insurance[features], insurance[outFeature] 
X.set_index("Date", inplace=True)
Y.set_index("Date", inplace=True)

# Normalize Data

In [48]:
#Normalize data using standard scalar.
ss = StandardScaler()

X["ClosePrice"] = ss.fit_transform(X)
Y["NextPrice"] = ss.fit_transform(Y)


 # Train Test Split

Using train_test_split of sklearn will split data randomly so the sequence is lost.

In [49]:
def train_test_split(x,y,train_size):
    n = int(len(x)*train_size)
    trainx = x[:n]
    testx = x[n:]
    trainy=y[:n]
    testy=y[n:]
    return trainx,testx,trainy,testy

In [50]:
#Split train, validation, test set from X,Y to 80%, 10%, 10%

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9)

In [51]:
X_train.shape,X_test.shape,X.shape

((603, 1), (67, 1), (670, 1))

# Convert to Matrix

In [52]:
# convert into dataset matrix
def convertToMatrix(data, step):
    data = data.to_numpy()
    X, Y =[], []
    for i in range(len(data)-step):
        d=i+step  
        X.append(data[i:d,])
        Y.append(data[d,])
    return np.array(X), np.array(Y)

step=5

trainX,trainY =convertToMatrix(X_train,step)
testX,testY =convertToMatrix(X_test,step)
# validX,validY = convertToMatrix(X_valid,step)

trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
# validX = np.reshape(validX, (validX.shape[0], 1, validX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

trainX.shape,testX.shape #,validX.shape

((598, 1, 5), (62, 1, 5))

# Modeling

In [53]:
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN


# SimpleRNN model
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(1,step), activation="relu"))
model.add(Dense(8, activation="relu")) 
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_1 (SimpleRNN)    (None, 32)                1216      
                                                                 
 dense_2 (Dense)             (None, 8)                 264       
                                                                 
 dense_3 (Dense)             (None, 1)                 9         
                                                                 
Total params: 1,489
Trainable params: 1,489
Non-trainable params: 0
_________________________________________________________________


In [54]:
# we have 90% of data as train and if we take 11.11% from train, that will be equal to 10% of overall data as validation
model.fit(trainX,trainY, validation_split=0.111,#validation_data=(validX,validY),
          epochs=100, batch_size=16, verbose=2)
trainPredict = model.predict(trainX)
testPredict= model.predict(testX)
predicted=np.concatenate((trainPredict,testPredict),axis=0)

Epoch 1/100
34/34 - 3s - loss: 0.0637 - val_loss: 2.7792 - 3s/epoch - 75ms/step
Epoch 2/100
34/34 - 0s - loss: 0.0213 - val_loss: 2.1879 - 127ms/epoch - 4ms/step
Epoch 3/100
34/34 - 0s - loss: 0.0152 - val_loss: 1.4656 - 132ms/epoch - 4ms/step
Epoch 4/100
34/34 - 0s - loss: 0.0101 - val_loss: 0.8645 - 130ms/epoch - 4ms/step
Epoch 5/100
34/34 - 0s - loss: 0.0063 - val_loss: 0.4209 - 130ms/epoch - 4ms/step
Epoch 6/100
34/34 - 0s - loss: 0.0039 - val_loss: 0.1595 - 127ms/epoch - 4ms/step
Epoch 7/100
34/34 - 0s - loss: 0.0030 - val_loss: 0.0693 - 126ms/epoch - 4ms/step
Epoch 8/100
34/34 - 0s - loss: 0.0025 - val_loss: 0.0763 - 125ms/epoch - 4ms/step
Epoch 9/100
34/34 - 0s - loss: 0.0025 - val_loss: 0.0458 - 130ms/epoch - 4ms/step
Epoch 10/100
34/34 - 0s - loss: 0.0023 - val_loss: 0.0431 - 133ms/epoch - 4ms/step
Epoch 11/100
34/34 - 0s - loss: 0.0023 - val_loss: 0.0448 - 127ms/epoch - 4ms/step
Epoch 12/100
34/34 - 0s - loss: 0.0023 - val_loss: 0.0422 - 126ms/epoch - 4ms/step
Epoch 13/100
34

Epoch 100/100
34/34 - 0s - loss: 0.0014 - val_loss: 0.0781 - 128ms/epoch - 4ms/step


# Evaluate

Smaller the better.

In [55]:
test_score = model.evaluate(testX, testPredict, verbose=0)
print(test_score)

5.042561200275511e-15


# RMSE of Test

In [56]:
from sklearn.metrics import mean_squared_error

mean_squared_error(ss.inverse_transform(testX[:,:,0].flatten()), 
                   ss.inverse_transform(testPredict.flatten()), squared=False)

32.86703339002852

# Predict Trend

In [58]:
test=pd.DataFrame()
test["pred"]=ss.inverse_transform(testPredict.flatten())
test["real"]=ss.inverse_transform(testX[:,:,0])
test["date"]=X_test.index[:-5]
test.iplot(kind="line", x="date")