In [60]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [61]:
# Load the dataset
df = pd.read_csv('../../data/raw/NIFTY50_OHLCV/LT.csv')
df.head()

Unnamed: 0,timestamp,open,high,low,close,volume,open_interest
0,2025-06-27 15:29:00+05:30,3677.1,3679.9,3675.7,3678.9,3200,0
1,2025-06-27 15:28:00+05:30,3676.8,3678.0,3675.1,3677.9,13094,0
2,2025-06-27 15:27:00+05:30,3678.0,3679.7,3675.7,3676.0,15559,0
3,2025-06-27 15:26:00+05:30,3676.8,3679.8,3675.7,3678.4,12007,0
4,2025-06-27 15:25:00+05:30,3674.1,3677.6,3673.3,3677.6,21300,0


### Derived Features calculated:
- Garman-Klass Volatility
- Moving Averages
- RSI
- Bollinger Bands
- ATR
- MACD
- INR Volume

In [62]:
df['garman_klass_vol'] = np.sqrt(
    np.log(df['high'] / df['low']) ** 2 - 0.5 * (np.log(df['close'] / df['open']) ** 2)
) * np.sqrt(1 / (np.log(df['close'] / df['open']) ** 2).mean())

In [63]:
df['mov_avg_5'] = df['close'].rolling(window=5).mean()
df['mov_avg_20'] = df['close'].rolling(window=20).mean()
df['mov_avg_60'] = df['close'].rolling(window=60).mean()

In [64]:
df['rsi'] = (df['close'] - df['close'].rolling(window=14).mean()) / df['close'].rolling(window=14).std()
df['bb_lower'] = df['mov_avg_20'] - 2 * df['close'].rolling(window=20).std()
df['bb_upper'] = df['mov_avg_20'] + 2 * df['close'].rolling(window=20).std()
# df['bollinger_middle'] = df['mov_avg_20']

In [65]:
# Calculate the True Range (TR) components
high_low = df['high'] - df['low']
high_close_prev = (df['high'] - df['close'].shift(1)).abs()
low_close_prev = (df['low'] - df['close'].shift(1)).abs()

# Calculate True Range as the maximum of the three components
true_range = pd.concat([high_low, high_close_prev, low_close_prev], axis=1).max(axis=1)

# Calculate Average True Range (ATR) with a 14-period rolling window
df['atr'] = true_range.rolling(window=14, min_periods=1).mean()

In [66]:
# Calculate MACD (Moving Average Convergence Divergence)
# Standard periods: 12-day EMA, 26-day EMA, and 9-day signal line
ema_12 = df['close'].ewm(span=12).mean()
ema_26 = df['close'].ewm(span=26).mean()

# MACD line = 12-day EMA - 26-day EMA
df['macd'] = ema_12 - ema_26

# Signal line = 9-day EMA of MACD line
df['macd_signal'] = df['macd'].ewm(span=9).mean()

# MACD histogram = MACD line - Signal line
df['macd_histogram'] = df['macd'] - df['macd_signal']

In [67]:
df['inr_volume'] = (df['volume'] * df['close'])/1e6

In [68]:
df

Unnamed: 0,timestamp,open,high,low,close,volume,open_interest,garman_klass_vol,mov_avg_5,mov_avg_20,mov_avg_60,rsi,bb_lower,bb_upper,atr,macd,macd_signal,macd_histogram,inr_volume
0,2025-06-27 15:29:00+05:30,3677.1,3679.9,3675.7,3678.9,3200,0,1.832117,,,,,,,4.200000,0.000000,0.000000,0.000000,11.772480
1,2025-06-27 15:28:00+05:30,3676.8,3678.0,3675.1,3677.9,13094,0,1.279266,,,,,,,4.000000,-0.022436,-0.012464,-0.009972,48.158423
2,2025-06-27 15:27:00+05:30,3678.0,3679.7,3675.7,3676.0,15559,0,1.712709,,,,,,,4.000000,-0.087507,-0.043219,-0.044287,57.194884
3,2025-06-27 15:26:00+05:30,3676.8,3679.8,3675.7,3678.4,12007,0,1.803888,,,,,,,4.025000,-0.028306,-0.038168,0.009861,44.166549
4,2025-06-27 15:25:00+05:30,3674.1,3677.6,3673.3,3677.6,21300,0,1.610711,3677.76,,,,,,4.240000,-0.027376,-0.034957,0.007581,78.332880
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8620,2025-05-28 09:19:00+05:30,3633.8,3634.7,3625.2,3627.0,9651,0,3.800004,3630.78,3628.350,3629.953333,-0.668249,3620.876679,3635.823321,7.185714,0.364401,0.128838,0.235563,35.004177
8621,2025-05-28 09:18:00+05:30,3638.0,3638.0,3633.0,3633.8,5920,0,1.862768,3631.14,3628.980,3629.950000,1.111754,3621.932313,3636.027687,7.592857,0.657154,0.234501,0.422653,21.512096
8622,2025-05-28 09:17:00+05:30,3631.0,3643.7,3630.4,3639.0,179233,0,5.570882,3632.88,3629.785,3630.036667,2.109871,3622.019957,3637.550043,8.257143,1.293845,0.446370,0.847475,652.228887
8623,2025-05-28 09:16:00+05:30,3643.4,3643.8,3630.1,3631.8,12006,0,5.079787,3632.48,3629.845,3630.066667,0.088801,3622.035027,3637.654973,8.500000,1.203573,0.597810,0.605763,43.603391
