In [87]:
import yfinance as yf
import datetime
import numpy as np
from ta import momentum, trend
import pandas as pd

def get_daily_data(symbol):
    # ------------------------------------ DAY TIME FRAME -----------------------------------------------------------
    ticker = yf.Ticker(symbol)
    
    # Fetch daily data
    day_df = ticker.history(period="80d", interval="1d")
    
    # Reset the index and convert the 'Date' column to date format
    day_df.reset_index(inplace=True)
    day_df['Date'] = pd.to_datetime(day_df['Date']).dt.date
    
    # Calculate technical indicators
    day_df['Day_10EMA'] = trend.EMAIndicator(day_df['Close'], window=10).ema_indicator()
    day_df['Day_RSI'] = momentum.RSIIndicator(day_df['Close'], window=3).rsi()
    day_df['Day_RSI_EMA'] = trend.EMAIndicator(day_df['Day_RSI'], window=6).ema_indicator()

    # -----------ADD EXTRA INDICATORS HERE --------------

    day_df['Day_Trend'] = np.select(
        [day_df['Day_10EMA'] > day_df['Day_10EMA'].shift(1),
        day_df['Day_10EMA'] < day_df['Day_10EMA'].shift(1)],
        [1, -1],
        0
    )
    
    # Rename columns and drop unnecessary columns
    day_df.rename(columns={'Open': 'Day_Open', 'High': 'Day_High', 'Low': 'Day_Low', 'Close': 'Day_Close'}, inplace=True)
    day_df.drop(columns=['Volume', 'Dividends', 'Stock Splits'], inplace=True)
    day_df.dropna(inplace=True)

    # ------------------------------------ 5 MIN TIME FRAME -----------------------------------------------------------
    previous_days = 20
    df = ticker.history(period=f"{previous_days}d", interval="5m")
    
    # Reset the index and convert the 'Datetime' column to datetime format
    df.reset_index(inplace=True)
    df['Datetime'] = pd.to_datetime(df['Datetime'])
    
    # Add 'Date' and 'Time' columns
    df['Date'] = df['Datetime'].dt.date
    df['Time'] = df['Datetime'].dt.time
    
    # Drop unnecessary columns
    df.drop(columns=['Volume', 'Dividends', 'Stock Splits'], inplace=True)
    df.dropna(inplace=True)
    
    # -----------ADD EXTRA INDICATORS HERE --------------
    df['8EMA'] = trend.EMAIndicator(df['Close'], window=5).ema_indicator()
    df['15EMA'] = trend.EMAIndicator(df['Close'], window=15).ema_indicator()

    df['Prev_8EMA'] = df['8EMA'].shift(1)
    df['Prev_15EMA'] = df['15EMA'].shift(1)

    df['10EMA'] = trend.EMAIndicator(df['Close'], window=10).ema_indicator()

    df['RSI'] = momentum.RSIIndicator(df['Close'], window=3).rsi()
    df['RSI_EMA'] = trend.EMAIndicator(df['RSI'], window=6).ema_indicator()
   
    short_ema = trend.EMAIndicator(df["Close"], window=5)
    long_ema = trend.EMAIndicator(df["Close"], window=20)
    
    df["5EMA"] = short_ema.ema_indicator()
    df["20EMA"] = long_ema.ema_indicator()
    
    # Determine the perfect trend based on EMA crossovers
    df["Trend"] = 0  # Default to Sideways
    df.loc[(df["5EMA"] > df["20EMA"]) & (df["5EMA"].shift(1) <= df["20EMA"].shift(1)), "Trend"] = 1
    df.loc[(df["5EMA"] < df["20EMA"]) & (df["5EMA"].shift(1) >= df["20EMA"].shift(1)), "Trend"] = -1

    # ------------------------------------ MERGE DATA FRAME -----------------------------------------------------------
    merged_df = day_df.merge(df, on='Date', how='inner')
    merged_df.dropna(inplace=True)
    merged_df.drop(columns=['8EMA', '15EMA', 'Prev_8EMA', 'Prev_15EMA', '10EMA', '5EMA', '20EMA'], inplace=True)
    
    # Set 'Datetime' as the index
    merged_df.set_index('Datetime', inplace=True)
    
    return merged_df

df = get_daily_data("^NSEI")
df = df.iloc[:-6]
df.tail()
# df.iloc[-6]


Unnamed: 0_level_0,Date,Day_Open,Day_High,Day_Low,Day_Close,Day_10EMA,Day_RSI,Day_RSI_EMA,Day_Trend,Open,High,Low,Close,Time,RSI,RSI_EMA,Trend
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2023-11-15 14:35:00+05:30,2023-11-15,19651.400391,19693.199219,19579.650391,19675.449219,19414.894367,93.29697,77.052746,1,19667.150391,19677.349609,19665.599609,19675.099609,14:35:00,93.135279,65.785781,0
2023-11-15 14:40:00+05:30,2023-11-15,19651.400391,19693.199219,19579.650391,19675.449219,19414.894367,93.29697,77.052746,1,19675.25,19675.449219,19665.199219,19668.900391,14:40:00,58.43921,63.68676,0
2023-11-15 14:45:00+05:30,2023-11-15,19651.400391,19693.199219,19579.650391,19675.449219,19414.894367,93.29697,77.052746,1,19668.400391,19670.599609,19660.0,19661.400391,14:45:00,34.867133,55.452581,0
2023-11-15 14:50:00+05:30,2023-11-15,19651.400391,19693.199219,19579.650391,19675.449219,19414.894367,93.29697,77.052746,1,19661.449219,19662.099609,19656.0,19657.650391,14:50:00,26.768971,47.257264,0
2023-11-15 14:55:00+05:30,2023-11-15,19651.400391,19693.199219,19579.650391,19675.449219,19414.894367,93.29697,77.052746,1,19657.949219,19661.099609,19645.150391,19646.25,14:55:00,13.000126,37.46951,-1


In [88]:
# import numpy as np
# import pandas as pd
# import tensorflow as tf
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.model_selection import train_test_split

# # Feature scaling
# scaler = MinMaxScaler()
# df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

# # Create sequences for input data and labels
# sequence_length = 5  # You can adjust this based on your requirements
# X, y = [], []

# for i in range(len(df_scaled) - sequence_length):
#     X.append(df_scaled.iloc[i:i+sequence_length].values)
#     y.append(df_scaled.iloc[i+sequence_length].values)

# X = np.array(X)
# y = np.array(y)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Build the neural network model
# model = tf.keras.Sequential([
#     tf.keras.layers.LSTM(units=50, activation='relu', input_shape=(sequence_length, X.shape[2])),
#     tf.keras.layers.Dense(units=X.shape[2])
# ])

# # Compile the model
# model.compile(optimizer='adam', loss='mean_squared_error')

# # Train the model
# model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1)

# # Evaluate the model on the test set
# loss = model.evaluate(X_test, y_test)
# print(f'Mean Squared Error on Test Set: {loss}')

# # Predict the next row data
# last_sequence = df_scaled.iloc[-sequence_length:].values.reshape((1, sequence_length, X.shape[2]))
# predicted_data_scaled = model.predict(last_sequence)
# predicted_data = scaler.inverse_transform(predicted_data_scaled)

# # Display the predicted data
# print("Predicted Next Row Data:")
# print(pd.DataFrame(predicted_data, columns=df.columns))


In [89]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Assuming df contains only numeric columns and 'datetime.date' is excluded

# Feature scaling
numeric_columns = df.select_dtypes(include=[np.number]).columns
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df[numeric_columns]), columns=numeric_columns, index=df.index)

# Create sequences for input data and labels
sequence_length = 5
X, y = [], []

for i in range(len(df_scaled) - sequence_length):
    X.append(df_scaled.iloc[i:i+sequence_length].values)
    y.append(df_scaled.iloc[i+sequence_length].values)

X = np.array(X)
y = np.array(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(units=50, activation='relu', input_shape=(sequence_length, X.shape[2])),
    tf.keras.layers.Dense(units=X.shape[2])
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1)

# Evaluate the model on the test set
loss = model.evaluate(X_test, y_test)
print(f'Mean Squared Error on Test Set: {loss}')

# Predict the next row data
last_sequence = df_scaled.iloc[-sequence_length:].values.reshape((1, sequence_length, X.shape[2]))
predicted_data_scaled = model.predict(last_sequence)

# Ensure the shape of predicted_data_scaled matches the expected shape
if predicted_data_scaled.shape[1] != len(numeric_columns):
    raise ValueError("Number of columns in predicted_data_scaled does not match the expected number of columns.")

predicted_data = scaler.inverse_transform(predicted_data_scaled)

# Display the predicted data
print("Predicted Next Row Data:")
print(pd.DataFrame(predicted_data, columns=numeric_columns))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Mean Squared Error on Test Set: 0.004146963357925415
Predicted Next Row Data:
       Day_Open      Day_High       Day_Low     Day_Close     Day_10EMA  \
0  19668.435547  19683.158203  19580.402344  19651.488281  19427.509766   

     Day_RSI  Day_RSI_EMA  Day_Trend          Open          High  \
0  90.391212    76.360466   1.032883  19650.193359  19647.351562   

            Low         Close        RSI   

In [90]:
     Day_Open      Day_High       Day_Low     Day_Close     Day_10EMA  \
0  19647.820312  19701.669922  19603.171875  19694.462891  19426.248047   

     Day_RSI  Day_RSI_EMA  Day_Trend          Open          High  \
0  90.822365    76.395622   0.998464  19694.767578  19690.248047   

            Low       Close        RSI    RSI_EMA     Trend  
0  19672.388672  19700.4375  78.382072  78.439232  0.046084 

SyntaxError: invalid syntax (50878707.py, line 1)