In [1]:
# Data Preparation Notebook
# Create and clean dummy intraday market data

import pandas as pd
import numpy as np


In [2]:
# Create 5-minute timestamps
dates = pd.date_range(
    start="2024-01-01",
    periods=1000,
    freq="5min"
)

print("Total candles:", len(dates))


Total candles: 1000


In [3]:
np.random.seed(42)

spot = pd.DataFrame({
    "datetime": dates,
    "open": np.cumsum(np.random.randn(len(dates))) + 18000,
})

spot["high"] = spot["open"] + np.random.rand(len(dates)) * 20
spot["low"] = spot["open"] - np.random.rand(len(dates)) * 20
spot["close"] = spot["open"] + np.random.randn(len(dates)) * 5
spot["volume"] = np.random.randint(100000, 300000, len(dates))

spot.head()


Unnamed: 0,datetime,open,high,low,close,volume
0,2024-01-01 00:00:00,18000.496714,18003.846366,17996.115338,17997.463213,266412
1,2024-01-01 00:05:00,18000.35845,18002.449807,17999.624023,18001.414868,205219
2,2024-01-01 00:10:00,18001.006138,18013.734743,17998.845623,18007.006533,105033
3,2024-01-01 00:15:00,18002.529168,18016.658683,17995.751955,18000.069656,173324
4,2024-01-01 00:20:00,18002.295015,18002.926738,17986.243301,17992.912251,115689


In [4]:
futures = spot.copy()

futures["close"] = futures["close"] + np.random.randn(len(dates)) * 10
futures["open_interest"] = np.random.randint(50000, 150000, len(dates))

futures.head()


Unnamed: 0,datetime,open,high,low,close,volume,open_interest
0,2024-01-01 00:00:00,18000.496714,18003.846366,17996.115338,18011.933319,266412,136799
1,2024-01-01 00:05:00,18000.35845,18002.449807,17999.624023,18014.698774,205219,98367
2,2024-01-01 00:10:00,18001.006138,18013.734743,17998.845623,18011.563403,105033,63201
3,2024-01-01 00:15:00,18002.529168,18016.658683,17995.751955,18006.700648,173324,52739
4,2024-01-01 00:20:00,18002.295015,18002.926738,17986.243301,18013.520479,115689,63231


In [5]:
options = pd.DataFrame({
    "datetime": dates,
    "call_oi": np.random.randint(20000, 80000, len(dates)),
    "put_oi": np.random.randint(20000, 80000, len(dates)),
    "call_volume": np.random.randint(5000, 20000, len(dates)),
    "put_volume": np.random.randint(5000, 20000, len(dates)),
    "call_iv": np.random.uniform(0.12, 0.25, len(dates)),
    "put_iv": np.random.uniform(0.12, 0.25, len(dates))
})

options.head()


Unnamed: 0,datetime,call_oi,put_oi,call_volume,put_volume,call_iv,put_iv
0,2024-01-01 00:00:00,68084,40622,15638,17467,0.127579,0.23766
1,2024-01-01 00:05:00,20352,75264,10828,11163,0.249795,0.176499
2,2024-01-01 00:10:00,76095,48584,9994,18935,0.217812,0.183452
3,2024-01-01 00:15:00,41586,67850,16556,17919,0.232307,0.240565
4,2024-01-01 00:20:00,25344,24722,11436,12635,0.204656,0.187729


In [6]:
merged = spot.merge(futures, on="datetime", suffixes=("_spot", "_fut"))
merged = merged.merge(options, on="datetime")

merged.head()


Unnamed: 0,datetime,open_spot,high_spot,low_spot,close_spot,volume_spot,open_fut,high_fut,low_fut,close_fut,volume_fut,open_interest,call_oi,put_oi,call_volume,put_volume,call_iv,put_iv
0,2024-01-01 00:00:00,18000.496714,18003.846366,17996.115338,17997.463213,266412,18000.496714,18003.846366,17996.115338,18011.933319,266412,136799,68084,40622,15638,17467,0.127579,0.23766
1,2024-01-01 00:05:00,18000.35845,18002.449807,17999.624023,18001.414868,205219,18000.35845,18002.449807,17999.624023,18014.698774,205219,98367,20352,75264,10828,11163,0.249795,0.176499
2,2024-01-01 00:10:00,18001.006138,18013.734743,17998.845623,18007.006533,105033,18001.006138,18013.734743,17998.845623,18011.563403,105033,63201,76095,48584,9994,18935,0.217812,0.183452
3,2024-01-01 00:15:00,18002.529168,18016.658683,17995.751955,18000.069656,173324,18002.529168,18016.658683,17995.751955,18006.700648,173324,52739,41586,67850,16556,17919,0.232307,0.240565
4,2024-01-01 00:20:00,18002.295015,18002.926738,17986.243301,17992.912251,115689,18002.295015,18002.926738,17986.243301,18013.520479,115689,63231,25344,24722,11436,12635,0.204656,0.187729


In [7]:
spot.to_csv("../data/nifty_spot_5min.csv", index=False)
futures.to_csv("../data/nifty_futures_5min.csv", index=False)
options.to_csv("../data/nifty_options_5min.csv", index=False)
merged.to_csv("../data/nifty_merged_5min.csv", index=False)

print("All data files saved successfully")


All data files saved successfully
