In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime
import os

# --------------------------------------------------
# Ensure directories exist
# --------------------------------------------------
BASE_DIR = os.path.dirname(os.getcwd())  # project root
DATA_DIR = os.path.join(BASE_DIR, "DATA", "raw")

os.makedirs(DATA_DIR, exist_ok=True)

# --------------------------------------------------
# 1. Fetch NIFTY Spot Data
# --------------------------------------------------
def fetch_nifty_spot():
    print("Fetching NIFTY 50 Spot data...")

    nifty = yf.download(
        tickers="^NSEI",
        period="60d",
        interval="5m",
        progress=False
    )

    if isinstance(nifty.columns, pd.MultiIndex):
        nifty.columns = nifty.columns.get_level_values(0)

    nifty.reset_index(inplace=True)

    nifty.rename(columns={
        "Datetime": "timestamp",
        "Open": "open",
        "High": "high",
        "Low": "low",
        "Close": "close",
        "Volume": "volume"
    }, inplace=True)

    nifty["timestamp"] = pd.to_datetime(nifty["timestamp"]).dt.tz_localize(None)

    print(f"Fetched {len(nifty)} rows of spot data")
    return nifty

# --------------------------------------------------
# 2. Generate Synthetic Options Data (Mock)
# --------------------------------------------------
def generate_synthetic_options(spot_df):
    print("Generating synthetic options data...")

    options = []

    for _, row in spot_df.iterrows():
        spot_price = row["close"]
        timestamp = row["timestamp"]

        atm = round(spot_price / 50) * 50
        strikes = [atm, atm + 50, atm + 100]

        for strike in strikes:
            time_value = np.random.uniform(50, 150)
            iv = np.random.uniform(10, 25)
            oi = np.random.randint(1000, 50000)

            call_price = max(spot_price - strike, 0) + time_value
            put_price = max(strike - spot_price, 0) + time_value

            options.append({
                "timestamp": timestamp,
                "strike": strike,
                "type": "CE",
                "ltp": call_price,
                "iv": iv,
                "oi": oi,
                "volume": oi // 10
            })

            options.append({
                "timestamp": timestamp,
                "strike": strike,
                "type": "PE",
                "ltp": put_price,
                "iv": iv,
                "oi": oi,
                "volume": oi // 10
            })

    return pd.DataFrame(options)

# --------------------------------------------------
# Main Execution
# --------------------------------------------------
if __name__ == "__main__":

    # Spot
    spot_data = fetch_nifty_spot()
    spot_data.to_csv(
        os.path.join(DATA_DIR, "nifty_spot_5min.csv"),
        index=False
    )

    # Futures (synthetic)
    futures_data = spot_data.copy()
    futures_data[["open", "high", "low", "close"]] *= 1.005
    futures_data["oi"] = np.random.randint(50_000, 200_000, len(futures_data))

    futures_data.to_csv(
        os.path.join(DATA_DIR, "nifty_futures_5min.csv"),
        index=False
    )

    # Options (synthetic)
    options_data = generate_synthetic_options(spot_data)
    options_data.to_csv(
        os.path.join(DATA_DIR, "nifty_options_5min.csv"),
        index=False
    )

    print("✅ All data saved in DATA/raw/")


Fetching NIFTY 50 Spot data...
Fetched 4252 rows of spot data
Generating synthetic options data...
✅ All data saved in DATA/raw/
