“Spot, futures, and options datasets were merged on timestamp alignment. Core quantitative features such as EMA indicators, futures basis, implied volatility metrics, and put-call ratios were engineered for downstream regime detection and strategy modeling.”

In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)


In [2]:
spot = pd.read_csv("../data/nifty_spot_5min.csv")
futures = pd.read_csv("../data/nifty_futures_5min.csv")
options = pd.read_csv("../data/nifty_options_5min.csv")

# Convert datetime
spot['datetime'] = pd.to_datetime(spot['datetime'])
futures['datetime'] = pd.to_datetime(futures['datetime'])
options['datetime'] = pd.to_datetime(options['datetime'])

print("All datasets loaded successfully ✅")


All datasets loaded successfully ✅


In [3]:
merged = pd.merge(
    spot,
    futures,
    on='datetime',
    how='inner'
)

print("After Spot + Futures merge:", merged.shape)
merged.head()


After Spot + Futures merge: (18504, 12)


Unnamed: 0,datetime,open,high,low,close,volume,futures_open,futures_high,futures_low,futures_close,futures_volume,open_interest
0,2024-01-01 09:15:00,21727.75,21737.35,21693.55,21693.75,0,,21727.351567,21683.940275,21705.645921,2821,520754
1,2024-01-01 09:20:00,21693.9,21701.05,21684.75,21696.55,0,21705.645921,21727.351567,21679.857477,21701.559036,5889,624779
2,2024-01-01 09:25:00,21697.3,21704.1,21693.2,21700.8,0,21701.559036,21736.052258,21679.857477,21714.33792,2585,602047
3,2024-01-01 09:30:00,21700.15,21705.8,21684.05,21703.05,0,21714.33792,21747.8142,21692.623582,21726.088112,5659,898063
4,2024-01-01 09:35:00,21703.05,21711.9,21689.7,21689.7,0,21726.088112,21747.8142,21671.973884,21693.667552,3280,581472


In [4]:
merged = pd.merge(
    merged,
    options,
    on='datetime',
    how='inner'
)

print("After adding Options:", merged.shape)
merged.head()


After adding Options: (18504, 19)


Unnamed: 0,datetime,open,high,low,close,volume,futures_open,futures_high,futures_low,futures_close,futures_volume,open_interest,atm_strike,call_iv,put_iv,call_oi,put_oi,call_volume,put_volume
0,2024-01-01 09:15:00,21727.75,21737.35,21693.55,21693.75,0,,21727.351567,21683.940275,21705.645921,2821,520754,21700.0,0.161702,0.170703,227864,193801,2205,1573
1,2024-01-01 09:20:00,21693.9,21701.05,21684.75,21696.55,0,21705.645921,21727.351567,21679.857477,21701.559036,5889,624779,21700.0,0.192032,0.195383,113462,385714,1412,3992
2,2024-01-01 09:25:00,21697.3,21704.1,21693.2,21700.8,0,21701.559036,21736.052258,21679.857477,21714.33792,2585,602047,21700.0,0.120011,0.137796,444289,438465,2941,2152
3,2024-01-01 09:30:00,21700.15,21705.8,21684.05,21703.05,0,21714.33792,21747.8142,21692.623582,21726.088112,5659,898063,21700.0,0.150233,0.163577,385292,244903,991,3465
4,2024-01-01 09:35:00,21703.05,21711.9,21689.7,21689.7,0,21726.088112,21747.8142,21671.973884,21693.667552,3280,581472,21700.0,0.134676,0.144133,127806,206026,4101,547


In [5]:
merged['futures_basis'] = (
    merged['futures_close'] - merged['close']
) / merged['close']


In [6]:
merged['spot_return'] = merged['close'].pct_change()
merged['futures_return'] = merged['futures_close'].pct_change()


In [7]:
merged['avg_iv'] = (merged['call_iv'] + merged['put_iv']) / 2
merged['iv_spread'] = merged['call_iv'] - merged['put_iv']


In [8]:
merged['pcr_oi'] = merged['put_oi'] / merged['call_oi']
merged['pcr_volume'] = merged['put_volume'] / merged['call_volume']


In [9]:
merged['ema_5'] = merged['close'].ewm(span=5, adjust=False).mean()
merged['ema_15'] = merged['close'].ewm(span=15, adjust=False).mean()


In [10]:
merged = merged.dropna().reset_index(drop=True)

print("Final merged dataset shape:", merged.shape)


Final merged dataset shape: (18503, 28)


In [11]:
merged.columns


Index(['datetime', 'open', 'high', 'low', 'close', 'volume', 'futures_open', 'futures_high', 'futures_low',
       'futures_close', 'futures_volume', 'open_interest', 'atm_strike', 'call_iv', 'put_iv', 'call_oi', 'put_oi',
       'call_volume', 'put_volume', 'futures_basis', 'spot_return', 'futures_return', 'avg_iv', 'iv_spread', 'pcr_oi',
       'pcr_volume', 'ema_5', 'ema_15'],
      dtype='object')

In [12]:
output_file = "../data/nifty_merged_features_5min.csv"

merged.to_csv(output_file, index=False)

print("Merged + feature dataset saved at:", output_file)


Merged + feature dataset saved at: ../data/nifty_merged_features_5min.csv
