In [1]:
import pandas as pd
import numpy as np
import os

merged = pd.read_csv("data_clean/nifty_merged_5min.csv")
merged["timestamp"] = pd.to_datetime(merged["timestamp"], errors="coerce")

merged = merged.dropna(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)

print("Merged shape:", merged.shape)
merged.head()

Merged shape: (4125, 18)


Unnamed: 0,timestamp,open,high,low,close,volume,fut_open,fut_high,fut_low,fut_close,fut_volume,fut_oi,ce_close,ce_oi,ce_vol,pe_close,pe_oi,pe_vol
0,2025-10-29 09:15:00+05:30,25982.0,26022.85,25966.0,25979.75,0,26400.0,26497.2,26350.0,26457.4,2795,1495,1235.05,787350.0,3225.0,547.866667,1465750,9600
1,2025-10-29 09:20:00+05:30,25980.55,25992.5,25968.4,25988.55,0,26457.4,26474.1,26457.4,26458.5,2600,2730,1584.2375,984425.0,2925.0,552.333333,1468600,11100
2,2025-10-29 09:25:00+05:30,25990.3,26016.85,25988.15,26010.0,0,26458.5,26499.0,26458.5,26494.0,1950,4940,1610.125,983730.0,3805.0,501.0875,1469115,11605
3,2025-10-29 09:30:00+05:30,26010.75,26025.05,25996.3,25996.35,0,26494.0,26500.0,26492.0,26492.0,1755,7345,1607.55,986345.0,8520.0,500.275,1468315,13365
4,2025-10-29 09:35:00+05:30,25996.9,25996.9,25963.2,25965.2,0,26492.0,26492.0,26460.0,26464.5,1625,8060,1596.8375,986540.0,505.0,512.725,1470915,7795


In [2]:
merged["ema_5"] = merged["close"].ewm(span=5, adjust=False).mean()
merged["ema_15"] = merged["close"].ewm(span=15, adjust=False).mean()

merged[["timestamp","close","ema_5","ema_15"]].tail()

Unnamed: 0,timestamp,close,ema_5,ema_15
4120,2026-01-16 15:05:00+05:30,25694.2,25698.595495,25700.95014
4121,2026-01-16 15:10:00+05:30,25682.45,25693.213663,25698.637622
4122,2026-01-16 15:15:00+05:30,25694.05,25693.492442,25698.064169
4123,2026-01-16 15:20:00+05:30,25700.0,25695.661628,25698.306148
4124,2026-01-16 15:25:00+05:30,25701.9,25697.741085,25698.75538


In [3]:
merged["spot_ret"] = merged["close"].pct_change()
merged["fut_ret"] = merged["fut_close"].pct_change()

merged[["timestamp","spot_ret","fut_ret"]].tail()

Unnamed: 0,timestamp,spot_ret,fut_ret
4120,2026-01-16 15:05:00+05:30,-0.000121,-5.4e-05
4121,2026-01-16 15:10:00+05:30,-0.000457,-0.000388
4122,2026-01-16 15:15:00+05:30,0.000452,0.000587
4123,2026-01-16 15:20:00+05:30,0.000232,0.000326
4124,2026-01-16 15:25:00+05:30,7.4e-05,-0.000508


In [4]:
merged["futures_basis"] = (merged["fut_close"] - merged["close"]) / merged["close"]
merged[["timestamp","futures_basis"]].tail()

Unnamed: 0,timestamp,futures_basis
4120,2026-01-16 15:05:00+05:30,0.002133
4121,2026-01-16 15:10:00+05:30,0.002202
4122,2026-01-16 15:15:00+05:30,0.002337
4123,2026-01-16 15:20:00+05:30,0.002432
4124,2026-01-16 15:25:00+05:30,0.001848


In [5]:
merged["pcr_oi"] = merged["pe_oi"] / merged["ce_oi"]
merged["pcr_vol"] = merged["pe_vol"] / merged["ce_vol"]

merged.replace([np.inf, -np.inf], np.nan, inplace=True)

merged[["timestamp","pcr_oi","pcr_vol"]].tail()

Unnamed: 0,timestamp,pcr_oi,pcr_vol
4120,2026-01-16 15:05:00+05:30,0.961875,1.516099
4121,2026-01-16 15:10:00+05:30,0.955375,1.459188
4122,2026-01-16 15:15:00+05:30,0.952799,1.173775
4123,2026-01-16 15:20:00+05:30,0.985175,1.023644
4124,2026-01-16 15:25:00+05:30,0.986628,1.052594


In [6]:
!pip install py_vollib



In [7]:
from py_vollib.black_scholes.implied_volatility import implied_volatility
from py_vollib.black_scholes.greeks.analytical import delta, gamma, theta, vega, rho
from math import log

RISK_FREE = 0.065  # 6.5%

def safe_iv(price, S, K, t, r, flag):
    try:
        if price <= 0 or S <= 0 or K <= 0 or t <= 0:
            return np.nan
        return implied_volatility(price, S, K, t, r, flag)
    except:
        return np.nan

def safe_greeks(flag, S, K, t, r, sigma):
    try:
        if np.isnan(sigma) or sigma <= 0:
            return (np.nan, np.nan, np.nan, np.nan, np.nan)
        return (
            delta(flag, S, K, t, r, sigma),
            gamma(flag, S, K, t, r, sigma),
            theta(flag, S, K, t, r, sigma),
            vega(flag, S, K, t, r, sigma),
            rho(flag, S, K, t, r, sigma),
        )
    except:
        return (np.nan, np.nan, np.nan, np.nan, np.nan)

In [8]:
step = 50
merged["atm_strike"] = (merged["close"] / step).round() * step

In [9]:
merged["t_years"] = 7 / 365  # approx weekly expiry

In [10]:
ivs_call = []
ivs_put = []

greeks_call = []
greeks_put = []

for row in merged.itertuples():
    S = row.close
    K = row.atm_strike
    t = row.t_years
    r = RISK_FREE

    ce_price = row.ce_close
    pe_price = row.pe_close

    iv_c = safe_iv(ce_price, S, K, t, r, "c")
    iv_p = safe_iv(pe_price, S, K, t, r, "p")

    ivs_call.append(iv_c)
    ivs_put.append(iv_p)

    greeks_call.append(safe_greeks("c", S, K, t, r, iv_c))
    greeks_put.append(safe_greeks("p", S, K, t, r, iv_p))

merged["iv_call"] = ivs_call
merged["iv_put"] = ivs_put

merged[["delta_call","gamma_call","theta_call","vega_call","rho_call"]] = pd.DataFrame(greeks_call, index=merged.index)
merged[["delta_put","gamma_put","theta_put","vega_put","rho_put"]] = pd.DataFrame(greeks_put, index=merged.index)

merged[["timestamp","iv_call","iv_put","delta_call","delta_put"]].tail()

Unnamed: 0,timestamp,iv_call,iv_put,delta_call,delta_put
4120,2026-01-16 15:05:00+05:30,0.25364,0.170986,0.518594,-0.478088
4121,2026-01-16 15:10:00+05:30,0.25578,0.168123,0.513409,-0.485704
4122,2026-01-16 15:15:00+05:30,0.255071,0.16901,0.518503,-0.478041
4123,2026-01-16 15:20:00+05:30,0.254164,0.169895,0.52114,-0.474188
4124,2026-01-16 15:25:00+05:30,0.251184,0.170705,0.522072,-0.473021


In [11]:
merged["avg_iv"] = (merged["iv_call"] + merged["iv_put"]) / 2
merged["iv_spread"] = merged["iv_call"] - merged["iv_put"]

merged["delta_neutral_ratio"] = (merged["delta_call"].abs() / merged["delta_put"].abs())
merged.replace([np.inf, -np.inf], np.nan, inplace=True)

# Gamma Exposure = spot close × gamma × open interest
merged["gamma_exposure_call"] = merged["close"] * merged["gamma_call"] * merged["ce_oi"]
merged["gamma_exposure_put"] = merged["close"] * merged["gamma_put"] * merged["pe_oi"]

In [12]:
os.makedirs("data_features", exist_ok=True)

save_path = "data_features/nifty_features_5min.csv"
merged.to_csv(save_path, index=False)

print("Saved:", save_path)
print("Rows:", len(merged))
print("Columns:", len(merged.columns))

Saved: data_features/nifty_features_5min.csv
Rows: 4125
Columns: 44


In [13]:
df = pd.read_csv("data_features/nifty_features_5min.csv")
print(df.shape)
df[["avg_iv","iv_spread","gamma_exposure_call","gamma_exposure_put"]].head()

(4125, 44)


Unnamed: 0,avg_iv,iv_spread,gamma_exposure_call,gamma_exposure_put
0,0.621497,0.470884,2641532.0,10930620.0
1,0.744856,0.705429,2575267.0,10779170.0
2,0.735631,0.74406,2548710.0,11620330.0
3,0.734716,0.75248,2548608.0,11786500.0
4,0.736356,0.724437,2576955.0,11304420.0


In [1]:
import pandas as pd

df = pd.read_csv("data_features/nifty_features_5min.csv")
cols = df.columns.tolist()

need = ["iv_call","iv_put","delta_call","delta_put","gamma_call","gamma_put",
        "theta_call","theta_put","vega_call","vega_put","rho_call","rho_put"]

print("Missing columns:", [c for c in need if c not in cols])

Missing columns: []


In [2]:
for c in need:
    if c in df.columns:
        print(c, "null:", df[c].isna().sum(), "| min:", df[c].min(), "| max:", df[c].max())

iv_call null: 12 | min: 0.2303214902774957 | max: 1.1441861533143367
iv_put null: 0 | min: 0.1532758298828472 | max: 0.4760045317500655
delta_call null: 12 | min: 0.5101208151802851 | max: 0.5366732058344778
delta_put null: 0 | min: -0.4907600927053845 | max: -0.4571368226894011
gamma_call null: 12 | min: 9.636239434542236e-05 | max: 0.0004858631993727
gamma_put null: 0 | min: 0.0002322576763597 | max: 0.0007145522368491
theta_call null: 12 | min: -119.32719502980352 | max: -25.66388656515924
theta_put null: 0 | min: -46.45201268637286 | max: -13.569896390838805
vega_call null: 12 | min: 13.958684809020456 | max: 14.539580181231033
vega_put null: 0 | min: 13.949330485432018 | max: 14.539568370715022
rho_call null: 12 | min: 2.340182740730943 | max: 2.582888472221283
rho_put null: 0 | min: -2.542289410103806 | max: -2.302830385813108


In [3]:
df = df.dropna(subset=["iv_call","delta_call","gamma_call","theta_call","vega_call","rho_call"])

In [4]:
df[["iv_call","delta_call","gamma_call","theta_call","vega_call","rho_call"]].isna().sum()

iv_call       0
delta_call    0
gamma_call    0
theta_call    0
vega_call     0
rho_call      0
dtype: int64

In [5]:
df = df.reset_index(drop=True)

In [6]:
df.to_csv("data_features/nifty_features_5min.csv", index=False)
print("Saved final:", df.shape)

Saved final: (4113, 44)


In [7]:
df[["timestamp"] + [c for c in need if c in df.columns]].head()

Unnamed: 0,timestamp,iv_call,iv_put,delta_call,delta_put,gamma_call,gamma_put,theta_call,theta_put,vega_call,vega_put,rho_call,rho_put
0,2025-10-29 09:15:00+05:30,0.856939,0.386055,0.525226,-0.485851,0.000129,0.000287,-89.889956,-37.209116,14.324459,14.344135,2.380038,-2.525782
1,2025-10-29 09:20:00+05:30,1.097571,0.392141,0.532399,-0.483251,0.000101,0.000282,-114.37439,-37.846638,14.31065,14.345369,2.349707,-2.5145
2,2025-10-29 09:25:00+05:30,1.107661,0.363601,0.534796,-0.477046,0.0001,0.000304,-115.450167,-34.960049,14.315183,14.346082,2.358887,-2.475707
3,2025-10-29 09:30:00+05:30,1.110956,0.358476,0.533518,-0.481215,9.9e-05,0.000309,-115.752007,-34.417721,14.311614,14.346409,2.351609,-2.49509
4,2025-10-29 09:35:00+05:30,1.098575,0.374138,0.535106,-0.475573,0.000101,0.000296,-114.319455,-35.973883,14.289548,14.318226,2.358384,-2.466508
