In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("NSEI .csv") #this dataset contains entries of the NIFTY 50 index of frequency 1d
df.dropna(subset = ['Close'], inplace = True) #drops entries with empty values (checks column ['Close'])
df = df.reset_index(drop = 'True') #this commands resets the indexes
prices = df['Close']
#print(df.head()) can be used to check first 5 entries
print(prices)

0        4494.649902
1        4546.200195
2        4732.350098
3        4747.549805
4        4837.549805
            ...     
3313    14310.799805
3314    14504.799805
3315    14581.450195
3316    14617.849609
3317    14359.450195
Name: Close, Length: 3318, dtype: float64


In [3]:
def calculate_rsi(prices, Window): #calculates the indicator RSI
    delta = prices.diff(1)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window = Window).mean()
    avg_loss = loss.rolling(window = Window).mean()
    rs = avg_gain/avg_loss
    rsi = 100 - 100/(1 + rs)
    return rsi

def calculate_ema(prices, start, Window): #calculates the ema for a price
    ema = prices.iloc[start-Window]
    multiplier = 2/(Window + 1)
    for i in range (start - Window, start) :
        ema = multiplier*prices.iloc[i] + ema*(1 - multiplier)
        #print(f"E={ema[i]} index={i}")
    return ema 

def series_ema(prices, Window): #Applies the ema formula on a series of prices (any Series it inputs)
    ema = pd.Series()
    for i in range(len(df)):
        if i < Window:
            ema[i] = None
        else:
            ema[i] = calculate_ema(prices, i, Window)
    return ema

def calculate_moving_averages(prices, short_window = 12, long_window = 26): #calculates moving average (short and long ema)
    #short_sma = prices.rolling(window = short_window).mean()
    #long_sma = prices.rolling(window = long_window).mean()
    short_ema = series_ema(df['Close'], 12)
    long_ema = series_ema(df['Close'], 26)
    df['short_ema'] = short_ema
    df['long_ema'] = long_ema
    df.dropna(subset = ['long_ema'], inplace = True)
    return short_ema, long_ema

In [4]:
short_ema, long_ema = calculate_moving_averages(prices)
MACD = df['short_ema'] - df['long_ema'] #formula of MACD
df['MACD'] = MACD #inserts a column for MACD
signal_line = series_ema(df['MACD'], 9)
df['SL'] = signal_line #inserts acolumn for signal line
df.dropna(subset = ['SL'], inplace = True) #cleans the data, getting rid of entries with NULL values in signal line column
df = df.reset_index(drop = 'True')
print(df)

            Date          Open          High           Low         Close  \
0     2007-10-24   5477.600098   5577.899902   5419.399902   5496.149902   
1     2007-10-25   5499.049805   5605.950195   5469.299805   5568.950195   
2     2007-10-26   5564.250000   5716.899902   5513.350098   5702.299805   
3     2007-10-29   5708.899902   5922.500000   5708.899902   5905.899902   
4     2007-10-30   5917.549805   5976.000000   5833.899902   5868.750000   
...          ...           ...           ...           ...           ...   
3261  2021-03-02  14865.299805  14959.099609  14760.799805  14919.099609   
3262  2021-03-03  15064.400391  15273.150391  14995.799805  15245.599609   
3263  2021-03-04  15026.750000  15202.349609  14980.200195  15080.750000   
3264  2021-03-05  14977.950195  15092.349609  14862.099609  14938.099609   
3265  2021-03-08  15002.450195  15111.150391  14919.900391  14956.200195   

         Adj Close    Volume     short_ema      long_ema        MACD  \
0      5496.149

In [5]:
def calculate_pc(prices): #calculates percentage chane from previous day's closing to current day's closing
    cpc = pd.Series()
    for i in range(len(df)):
        if i == 0:
            cpc[i] = None
        else:
            cpc[i] = ((prices.iloc[i] - prices.iloc[i-1])/prices.iloc[i-1])*100
    return cpc

def calculate_obv(): #calculates the "on-balance volume indicator"
    obv = pd.Series()
    for i in range(len(df)):
        if i == 0:
            obv[i] = df.loc[i, 'Volume']
        else:
            if(df.loc[i, 'Close'] > df.loc[i-1, 'Close']):
                obv[i] = obv.iloc[i-1] + df.loc[i, 'Volume']
            elif(df.loc[i, 'Close'] < df.loc[i-1, 'Close']):
                obv[i] = obv.iloc[i-1] - df.loc[i, 'Volume']
            else:
                obv[i] = obv.iloc[i-1]
    return obv

def calculate_target(): #target value(y) contains binary values for either buying or selling()
    target = pd.Series()
    for i in range(len(df) - 1):
            if(df.loc[i+1, 'Close'] > df.loc[i, 'Close']):
                target[i] = 1
            else:
                target[i] = 0
    return target

def williamsR(period): #calculates the indicator williamsR
    k = pd.Series()
    for i in range(len(df)):
        if i < 26:
            k[i] = 0
        else:
            current = df.loc[i,'Close']
            high_period = df.loc[i-period:i, 'High'].max()
            low_period = df.loc[i-period:i, 'Low'].min()
            k[i] = (current - high_period)/(high_period - low_period)
    return k

In [6]:
RSI = calculate_rsi(df['Close'], 14)
df['RSI'] = RSI #inserts a column for MACD
EMA50 = series_ema(df['Close'], 50)
df['ema50'] = EMA50 #inserts a column for EMA50w
CO = df['Close'] - df['Open']
df['close-open'] = CO #inserts a column for close-open difference for current day's price
HL = df['High'] - df['Low']
df['High- Low'] = HL #inserts a column for High-Low difference in current day's price
k = williamsR(14)
df['WR'] = k #inserts a column for WilliamsR

In [7]:
print(df)

            Date          Open          High           Low         Close  \
0     2007-10-24   5477.600098   5577.899902   5419.399902   5496.149902   
1     2007-10-25   5499.049805   5605.950195   5469.299805   5568.950195   
2     2007-10-26   5564.250000   5716.899902   5513.350098   5702.299805   
3     2007-10-29   5708.899902   5922.500000   5708.899902   5905.899902   
4     2007-10-30   5917.549805   5976.000000   5833.899902   5868.750000   
...          ...           ...           ...           ...           ...   
3261  2021-03-02  14865.299805  14959.099609  14760.799805  14919.099609   
3262  2021-03-03  15064.400391  15273.150391  14995.799805  15245.599609   
3263  2021-03-04  15026.750000  15202.349609  14980.200195  15080.750000   
3264  2021-03-05  14977.950195  15092.349609  14862.099609  14938.099609   
3265  2021-03-08  15002.450195  15111.150391  14919.900391  14956.200195   

         Adj Close    Volume     short_ema      long_ema        MACD  \
0      5496.149

In [8]:
CPC = calculate_pc(df['Close'])
OBV = calculate_obv()
y = calculate_target()
df['cpc'] = CPC #inserts a column for percentage change
df['obv'] = OBV #inserts a column for obv
df['Y'] = y #inserts a column for target value(Y)

In [9]:
#normalisation of features
df['normMACD'] = (df['MACD'] - df['MACD'].mean())/df['MACD'].std()
df['normSL'] = (df['SL'] - df['SL'].mean())/df['SL'].std()
df['RSI'] = df['RSI']/10
df['close-open'] = (df['close-open'] - df['close-open'].min())/(df['close-open'].max() - df['close-open'].min())
df['High- Low'] = (df['High- Low'] - df['High- Low'].min())/(df['High- Low'].max() - df['High- Low'].min())
df['obv'] = (df['obv'] - df['obv'].min())/(df['obv'].max() - df['obv'].min())
df['ema50'] = (df['ema50'] - df['ema50'].mean())/(df['ema50'].std())
df['WR'] = (df['WR'] - df['WR'].mean())/(df['WR'].std())

In [10]:
df.dropna(subset = ['ema50'], inplace = True) #drops all the entries with NULL values in the column 'ema50'
df = df.reset_index(drop = True)
print(df)

            Date          Open          High           Low         Close  \
0     2008-01-07   6271.000000   6289.799805   6193.350098   6279.100098   
1     2008-01-08   6282.450195   6357.100098   6221.600098   6287.850098   
2     2008-01-09   6287.549805   6338.299805   6231.250000   6272.000000   
3     2008-01-10   6278.100098   6347.000000   6142.899902   6156.950195   
4     2008-01-11   6166.649902   6224.200195   6112.549805   6200.100098   
...          ...           ...           ...           ...           ...   
3211  2021-03-02  14865.299805  14959.099609  14760.799805  14919.099609   
3212  2021-03-03  15064.400391  15273.150391  14995.799805  15245.599609   
3213  2021-03-04  15026.750000  15202.349609  14980.200195  15080.750000   
3214  2021-03-05  14977.950195  15092.349609  14862.099609  14938.099609   
3215  2021-03-08  15002.450195  15111.150391  14919.900391  14956.200195   

         Adj Close    Volume     short_ema      long_ema        MACD  ...  \
0      627

In [11]:
features = ['RSI', 'ema50', 'close-open', 'High- Low', 'WR', 'cpc', 'normMACD', 'normSL']
X = df[features]
Y = df['Y']

In [12]:
#splitting features on a 80-20 basis
X_train = X.loc[:2572]
Y_train = Y.loc[:2572]
X_test = X.loc[2573:]
Y_test = Y.loc[2573:]

In [13]:
#creating the neural network using tensorflow(keras)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
tf.random.set_seed(1234)
model = Sequential(
[
    tf.keras.Input(shape = (8, )),
    Dense(units = 120, activation = 'linear', name = 'Layer1'),
    Dense(units = 40, activation = 'linear', name = 'Layer2'),
    Dense(units = 4, activation = 'linear', name = 'Layer3'),
    Dense(units = 1, activation = 'sigmoid', name = 'Output')
])
model.compile(
    loss = tf.keras.losses.MeanSquaredError(),
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.003),
    metrics = [tf.keras.metrics.Accuracy()]
)

model.fit(
    X_train, Y_train,            
    epochs = 150,
)

Epoch 1/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2869
Epoch 2/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2520
Epoch 3/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2482
Epoch 4/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2462
Epoch 5/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2442
Epoch 6/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2434
Epoch 7/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2430
Epoch 8/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2427
Epoch 9/150
[1m

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2416
Epoch 67/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2416
Epoch 68/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2416
Epoch 69/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2416
Epoch 70/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2416
Epoch 71/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2417
Epoch 72/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2416
Epoch 73/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 969us/step - accuracy: 0.0000e+00 - loss: 0.2416
Epoch 74/150
[1m81

[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2417
Epoch 132/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2417
Epoch 133/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2417
Epoch 134/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2417
Epoch 135/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2417
Epoch 136/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2415
Epoch 137/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 0.2415
Epoch 138/150
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 955us/step - accuracy: 0.0000e+00 - loss: 0.2415
Epoch 139/15

<keras.src.callbacks.history.History at 0x1c936518ed0>

In [14]:
predict = model.predict(X_test)
Y_test.reset_index(drop = True)
print(Y_test)
for i in range(len(predict)):
    if(predict[i] > 0.5):
        predict[i] = 1
    else:
        predict[i] = 0
count = 0
for j in range(len(Y_test)):
    if(Y_test[j+2573] == predict[j]):
        count += 1
print(count/len(Y_test))

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
2573    1.0
2574    0.0
2575    1.0
2576    1.0
2577    1.0
       ... 
3211    1.0
3212    0.0
3213    0.0
3214    1.0
3215    NaN
Name: Y, Length: 643, dtype: float64
0.578538102643857
