In [1]:
!pip install yfinance



In [3]:
import numpy as np
import pandas as pd
import yfinance as yf

## Download Data

In [117]:
date_1, date_2 = "2024-01-01", "2024-08-31"

# Downlaod Data
ser_1 = yf.download('HAL', start=date_1, end=date_2)[['Adj Close']]
ser_2 = yf.download('RELIANCE.NS', start=date_1, end=date_2)[['Adj Close']]
ser_3 = yf.download('TATAMOTORS.NS', start=date_1, end=date_2)[['Adj Close']]

idx_1 = yf.download('^NSEI', start=date_1, end=date_2)[['Adj Close']]
idx_2 = yf.download('^BSESN', start=date_1, end=date_2)[['Adj Close']]

# Rename Columns
ser_1.columns = ['HAL']
ser_2.columns = ['RELIANCE']
ser_3.columns = ['TATAMOTORS']
idx_1.columns = ['NSEI']
idx_2.columns = ['BSESN']

# Join all data together
df = ser_1.join(ser_2).join(ser_3).join(idx_1).join(idx_2)

df.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,HAL,RELIANCE,TATAMOTORS,NSEI,BSESN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-02,35.561352,2602.865967,781.986877,21665.800781,71892.476562
2024-01-03,36.004395,2574.562012,779.045959,21517.349609,71356.601562
2024-01-04,35.226612,2587.866699,793.301941,21658.599609,71847.570312
2024-01-05,35.413673,2598.879395,788.516724,21710.800781,72026.148438
2024-01-08,34.665428,2578.598389,786.672363,21513.0,71355.21875


## Clean Data

In [118]:
# Checking Nulls

nulls = df[df.isnull().any(axis=1)]
nulls

Unnamed: 0_level_0,HAL,RELIANCE,TATAMOTORS,NSEI,BSESN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-22,33.897491,,,,
2024-01-26,36.949543,,,,
2024-03-08,35.823067,,,,
2024-03-25,38.405178,,,,
2024-04-11,40.502525,,,,
2024-04-17,37.633514,,,,
2024-05-01,35.941788,,,,
2024-05-20,37.554371,,,,
2024-06-17,33.219723,,,,
2024-07-17,36.152924,,,,


In [119]:
# Getting Numerical Index from Dataframe
null_idx = [df.index.get_loc(x) for x in sorted(nulls.index)]

# Filling Nulls With Average of Previous and Next Availabel Value
for idx in null_idx:
    df.iloc[idx, 1:] = (df.iloc[idx-1, 1:] + df.iloc[idx+1, 1:])/2

## Data Preparation

In [120]:
# Normalize Data [Scale Down]
norm_factor = df.iloc[0]
df = df/df.iloc[0]

# Creating Lag for Indices.
df[['5Days-Future-NSEI', '5Days-Future-BSESN']] = df[['NSEI', 'BSESN']].shift(-5)
df.dropna(inplace=True)

df

Unnamed: 0_level_0,HAL,RELIANCE,TATAMOTORS,NSEI,BSESN,5Days-Future-NSEI,5Days-Future-BSESN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-01-02,1.000000,1.000000,1.000000,1.000000,1.000000,0.994417,0.992958
2024-01-03,1.012459,0.989126,0.996239,0.993148,0.992546,0.997826,0.996734
2024-01-04,0.990587,0.994237,1.014470,0.999668,0.999375,0.999141,0.997617
2024-01-05,0.995847,0.998468,1.008350,1.002077,1.001859,1.010558,1.009403
2024-01-08,0.974806,0.990677,1.005992,0.992947,0.992527,1.016916,1.017196
...,...,...,...,...,...,...,...
2024-08-19,0.890534,1.143662,1.390944,1.134168,1.118680,1.154382,1.136393
2024-08-20,0.869005,1.149464,1.389921,1.139992,1.123940,1.154712,1.136583
2024-08-21,0.872640,1.151558,1.387747,1.143286,1.125365,1.156309,1.137609
2024-08-22,0.870962,1.151135,1.366327,1.145192,1.127422,1.160906,1.142465


In [129]:
df.iloc[158:158+5]

Unnamed: 0_level_0,HAL,RELIANCE,TATAMOTORS,NSEI,BSESN,5Days-Future-NSEI,5Days-Future-BSESN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-08-19,0.890534,1.143662,1.390944,1.134168,1.11868,1.154382,1.136393
2024-08-20,0.869005,1.149464,1.389921,1.139992,1.12394,1.154712,1.136583
2024-08-21,0.87264,1.151558,1.387747,1.143286,1.125365,1.156309,1.137609
2024-08-22,0.870962,1.151135,1.366327,1.145192,1.127422,1.160906,1.142465
2024-08-23,0.888018,1.152556,1.387683,1.14573,1.127882,1.16478,1.14568


In [136]:
def get_sample():
    inputs = ['HAL', 'RELIANCE', 'TATAMOTORS']
    outputs = ['5Days-Future-NSEI', '5Days-Future-BSESN']

    seq_len = 5
    input_depth = len(inputs)
    output_depth = len(outputs)
    batch_size = 2

    max_idx = len(df) - 4

    idx = 0
    yield "Starting Data Sample Generator."
    while True:
        # Reset Index
        if idx == max_idx:
            idx = 0
        # Fetch Sample
        X = df[inputs].iloc[idx:idx+seq_len].values.T
        Y = df[outputs].iloc[idx:idx+seq_len].values.T
        # Increase Index
        idx += 1
        # Return Sample
        yield X,Y

In [137]:
data_gen = get_sample()
next(data_gen)

'Starting Data Sample Generator.'

## Modelling