In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Bidirectional
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# Loading Banknifty data

bank_dataset = pd.read_csv('/content/Streak_Logistic.csv')

In [None]:
bank_dataset.head()

Unnamed: 0,Date,Close,Profit,Label
0,01/01/2015,-5500.172801,-5500.172801,0
1,01/02/2015,-11001.17615,-5501.003344,0
2,01/03/2015,8997.729452,19998.9056,1
3,01/04/2015,28998.75265,20001.0232,1
4,01/05/2015,48999.05458,20000.30193,1


In [None]:
def calculate_rsi(close_prices, window=14):
  """
  Calculates the Relative Strength Index (RSI) for a given set of closing prices.

  Args:
    close_prices: A pandas Series or numpy array of closing prices.
    window: The lookback period for calculating RSI (default is 14).

  Returns:
    A pandas Series or numpy array containing the RSI values.
  """

  # Calculate price differences
  price_diffs = close_prices.diff()

  # Separate gains and losses
  gains = price_diffs.where(price_diffs > 0, 0.0)
  losses = -price_diffs.where(price_diffs < 0, 0.0)

  # Calculate average gains and losses
  avg_gains = gains.rolling(window).mean()
  avg_losses = losses.rolling(window).mean()

  # Calculate relative strength (RS)
  rs = avg_gains / avg_losses

  # Calculate RSI
  rsi = 100 - (100 / (1 + rs))

  return rsi

# Assuming 'data_rest' is your DataFrame with a 'Close' column
close_prices = bank_dataset['Close']

# Calculate RSI
rsi = calculate_rsi(close_prices)

# Add RSI values to your DataFrame
bank_dataset['RSI'] = rsi

print(bank_dataset)

           Date         Close        Profit  Label        RSI
0    01/01/2015 -5.500173e+03  -5500.172801      0        NaN
1    01/02/2015 -1.100118e+04  -5501.003344      0        NaN
2    01/03/2015  8.997729e+03  19998.905600      1        NaN
3    01/04/2015  2.899875e+04  20001.023200      1        NaN
4    01/05/2015  4.899905e+04  20000.301930      1        NaN
..          ...           ...           ...    ...        ...
870  04/04/2023  1.648724e+06  -5500.000000      0  66.889632
871  04/05/2023  1.643224e+06  -5500.000000      0  59.259259
872  04/06/2023  1.637724e+06  -5500.000000      0  59.259259
873  04/07/2023  1.632224e+06  -5500.000000      0  59.259259
874  04/08/2023  1.652224e+06  20000.000000      1  59.259259

[875 rows x 5 columns]


In [None]:
bank_dataset = bank_dataset.iloc[14:]
print(bank_dataset)

           Date         Close        Profit  Label        RSI
14   01/15/2015  9.442652e+04  42932.466150      1  76.586209
15   01/16/2015  8.892705e+04  -5499.469899      0  76.586834
16   01/17/2015  8.342582e+04  -5501.228914      0  71.457631
17   01/18/2015  7.792578e+04  -5500.039342      0  65.392695
18   01/19/2015  7.242576e+04  -5500.022283      0  58.110105
..          ...           ...           ...    ...        ...
870  04/04/2023  1.648724e+06  -5500.000000      0  66.889632
871  04/05/2023  1.643224e+06  -5500.000000      0  59.259259
872  04/06/2023  1.637724e+06  -5500.000000      0  59.259259
873  04/07/2023  1.632224e+06  -5500.000000      0  59.259259
874  04/08/2023  1.652224e+06  20000.000000      1  59.259259

[861 rows x 5 columns]


In [None]:
#Scaling the close price data
data = bank_dataset.reset_index()['Close']
print(data)

scalar = MinMaxScaler(feature_range=(0,1))
reshaped_data = data.values.reshape(-1, 1)
scaled_data = scalar.fit_transform(reshaped_data)
print(scaled_data)

0      9.442652e+04
1      8.892705e+04
2      8.342582e+04
3      7.792578e+04
4      7.242576e+04
           ...     
856    1.648724e+06
857    1.643224e+06
858    1.637724e+06
859    1.632224e+06
860    1.652224e+06
Name: Close, Length: 861, dtype: float64
[[0.02915059]
 [0.02577522]
 [0.02239877]
 [0.01902304]
 [0.01564733]
 [0.02792216]
 [0.02454693]
 [0.02117178]
 [0.03344711]
 [0.03007183]
 [0.02669586]
 [0.02332099]
 [0.0199446 ]
 [0.01656876]
 [0.02884485]
 [0.02546941]
 [0.02209371]
 [0.01871735]
 [0.01534134]
 [0.01196586]
 [0.02424051]
 [0.03651655]
 [0.04879139]
 [0.04541645]
 [0.04204039]
 [0.03866484]
 [0.0352893 ]
 [0.03074169]
 [0.04301742]
 [0.03964137]
 [0.05191654]
 [0.04610122]
 [0.04272479]
 [0.05500022]
 [0.05162436]
 [0.0482493 ]
 [0.04487289]
 [0.04149679]
 [0.05377269]
 [0.05039624]
 [0.0470203 ]
 [0.04364495]
 [0.04026934]
 [0.03689438]
 [0.04916911]
 [0.04579341]
 [0.04106787]
 [0.037693  ]
 [0.04996809]
 [0.0465922 ]
 [0.04298196]
 [0.03960671]
 [0.0563026

In [None]:
scaled_data_df = pd.DataFrame(scaled_data, columns=['Close']) #Create a DataFrame from scaled_data
rsi_output = bank_dataset['RSI'].reset_index(drop=True) # Reset index of 'RSI' to align with scaled_data_df
scaled_data = pd.concat([scaled_data_df, rsi_output], axis=1) # Concatenate DataFrames
print(scaled_data)

        Close        RSI
0    0.029151  76.586209
1    0.025775  76.586834
2    0.022399  71.457631
3    0.019023  65.392695
4    0.015647  58.110105
..        ...        ...
856  0.983122  66.889632
857  0.979746  59.259259
858  0.976370  59.259259
859  0.972994  59.259259
860  0.985270  59.259259

[861 rows x 2 columns]


In [None]:
# Split into two
data_4y = scaled_data[:-200]  # Assuming approximately 100 trading days in a year
data_rest = scaled_data[-210:]

print("Data for the past 4 years:")
print(len(data_4y))
print(data_4y)

print("\nData for the past 1 year plus 50 days:")
print(len(data_rest))
print(data_rest)

Data for the past 4 years:
661
        Close        RSI
0    0.029151  76.586209
1    0.025775  76.586834
2    0.022399  71.457631
3    0.019023  65.392695
4    0.015647  58.110105
..        ...        ...
656  0.709114  57.440159
657  0.705738  57.440135
658  0.702362  48.086431
659  0.698987  48.086449
660  0.695611  49.792363

[661 rows x 2 columns]

Data for the past 1 year plus 50 days:
210
        Close        RSI
651  0.694691  46.630337
652  0.691315  46.630591
653  0.687940  48.086390
654  0.700215  48.086580
655  0.712490  57.440368
..        ...        ...
856  0.983122  66.889632
857  0.979746  59.259259
858  0.976370  59.259259
859  0.972994  59.259259
860  0.985270  59.259259

[210 rows x 2 columns]


In [None]:
# separating labels

label = bank_dataset['Label']

label_4y = label[:-100]
label_rest = label[-103:]

print("Labels for the past 4 years:")
print(len(label_4y))
print(label_4y)

print("\nLabels for the past 1 year plus 50 days:")
print(len(label_rest))
print(label_rest)

Labels for the past 4 years:
761
14     1
15     0
16     0
17     0
18     0
      ..
770    0
771    0
772    1
773    0
774    1
Name: Label, Length: 761, dtype: int64

Labels for the past 1 year plus 50 days:
103
772    1
773    0
774    1
775    0
776    1
      ..
870    0
871    0
872    0
873    0
874    1
Name: Label, Length: 103, dtype: int64


In [None]:
# Independednt variable
prediction_days = 10

X_train = []

for x in range(prediction_days, len(data_4y)):
  X_train.append(data_4y[['Close', 'RSI']][x-prediction_days:x].values)




In [None]:
X_train = np.array(X_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)) # Add a new dimension while preserving the existing ones
print(X_train)
len(X_train)

[[[[2.91505921e-02]
   [7.65862090e+01]]

  [[2.57752187e-02]
   [7.65868340e+01]]

  [[2.23987656e-02]
   [7.14576312e+01]]

  ...

  [[2.11717762e-02]
   [6.53930212e+01]]

  [[3.34471111e-02]
   [6.53928951e+01]]

  [[3.00718291e-02]
   [6.53933646e+01]]]


 [[[2.57752187e-02]
   [7.65868340e+01]]

  [[2.23987656e-02]
   [7.14576312e+01]]

  [[1.90230427e-02]
   [6.53926955e+01]]

  ...

  [[3.34471111e-02]
   [6.53928951e+01]]

  [[3.00718291e-02]
   [6.53933646e+01]]

  [[2.66958609e-02]
   [6.53932624e+01]]]


 [[[2.23987656e-02]
   [7.14576312e+01]]

  [[1.90230427e-02]
   [6.53926955e+01]]

  [[1.56473302e-02]
   [5.81101052e+01]]

  ...

  [[3.00718291e-02]
   [6.53933646e+01]]

  [[2.66958609e-02]
   [6.53932624e+01]]

  [[2.33209869e-02]
   [5.78206588e+01]]]


 ...


 [[[6.89166525e-01]
   [3.50344423e+01]]

  [[7.01441917e-01]
   [4.66298804e+01]]

  [[6.98066315e-01]
   [4.66300667e+01]]

  ...

  [[7.12489879e-01]
   [5.74403680e+01]]

  [[7.09113802e-01]
   [5.74401589e

651

In [None]:
# Dependent variable

Y_train = label[10:-200]
len(Y_train)


651

In [None]:
# model
model =Sequential()
model.add(LSTM(units=100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]))) # Removed extra dimension from input shape
model.add(Dropout(0.3))
model.add(LSTM(units=100, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units=100))
model.add(Dropout(0.3))
model.add(Dense(units=1, activation = 'sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics = ['accuracy'])
model.fit(X_train, Y_train, epochs=35, batch_size=30)

Epoch 1/35


  super().__init__(**kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 38ms/step - accuracy: 0.6894 - loss: 0.6190
Epoch 2/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.7347 - loss: 0.5969
Epoch 3/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.7456 - loss: 0.5774
Epoch 4/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.7198 - loss: 0.5998
Epoch 5/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.7100 - loss: 0.6082
Epoch 6/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.6899 - loss: 0.6206
Epoch 7/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.7138 - loss: 0.6030
Epoch 8/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.7033 - loss: 0.6047
Epoch 9/35
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x7b04004ef550>

In [None]:
# Calculate the predictions
predictions = model.predict(X_train)

# Convert predictions to binary values based on a threshold (e.g., 0.5)
binary_predictions = (predictions > 0.2755).astype(int)

# Calculate accuracy score using binary predictions
score = accuracy_score(Y_train, binary_predictions)
print("Accuracy of Training data:", score)


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Accuracy of Training data: 0.3348694316436252


In [None]:
from sklearn.metrics import confusion_matrix



# Calculate the confusion matrix
cm = confusion_matrix(Y_train, binary_predictions)

# Print the confusion matrix
print(cm)
# Trading accuracy interms of prediction
accuracy = (cm[1,1] ) / (cm[1,1]+cm[0,1])
print("Trading Accuracy:", accuracy)

tn, fp, fn, tp = cm.ravel()
print("True Positives:", tp)
print("True Negatives:", tn)
print("False Positives:", fp)
print("False Negatives:", fn)

[[ 43 421]
 [ 12 175]]
Trading Accuracy: 0.2936241610738255
True Positives: 175
True Negatives: 43
False Positives: 421
False Negatives: 12


In [None]:
# New Test data
X_test = []

for x in range(prediction_days, len(data_rest)):
  X_test.append(data_rest[['Close', 'RSI']][x-prediction_days:x].values)

X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2], 1))





In [None]:
# Calculate the predictions
predictions2 = model.predict(X_test)

# Convert predictions to binary values based on a threshold (e.g., 0.5)
binary_predictions2 = (predictions2 > 0.2755).astype(int)



[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


In [None]:
Y_test = label[-200:]
len(Y_test)
print(Y_test)



675    1
676    0
677    0
678    0
679    1
      ..
870    0
871    0
872    0
873    0
874    1
Name: Label, Length: 200, dtype: int64


In [None]:

# Calculate accuracy score using binary predictions
score = accuracy_score(Y_test, binary_predictions2)
print("Accuracy of Training data:", score)

Accuracy of Training data: 0.315


In [None]:
from sklearn.metrics import confusion_matrix



# Calculate the confusion matrix
cm = confusion_matrix(Y_test, binary_predictions2)

# Print the confusion matrix
print(cm)

tn, fp, fn, tp = cm.ravel()
print("True Positives:", tp)
print("True Negatives:", tn)
print("False Positives:", fp)
print("False Negatives:", fn)
accuracy = (cm[1,1] ) / (cm[1,1]+cm[0,1])
print("Trading Accuracy:", accuracy)

[[ 11 125]
 [ 12  52]]
True Positives: 52
True Negatives: 11
False Positives: 125
False Negatives: 12
Trading Accuracy: 0.2937853107344633


# Logistic regression

In [None]:
# Model building
model = LogisticRegression()
X_train_2d = X_train.reshape(X_train.shape[0], -1)
model.fit(X_train_2d, Y_train)

score = accuracy_score(Y_train, model.predict(X_train_2d))
print("Accuracy of Training data:", score)


Accuracy of Training data: 0.711139896373057


In [None]:
from sklearn.metrics import confusion_matrix

# Calculate the confusion matrix
cm = confusion_matrix(Y_train,model.predict(X_train_2d))

# Print the confusion matrix
print(cm)

[[549   0]
 [223   0]]
