In [None]:
import json
import pandas as pd

# ---- Market session (IST) ----
MARKET_START_HOUR = 9
MARKET_START_MINUTE = 15   # change to 0 if you really want from 9:00
MARKET_END_HOUR = 15
MARKET_END_MINUTE = 30

# ---- Load JSON ----
with open("SBI Bank 1H.json", "r") as f:
    data = json.load(f)

# ---- Build DataFrame from JSON arrays ----
df = pd.DataFrame({
    "timestamp": data["t"],
    "open": data["o"],
    "high": data["h"],
    "low": data["l"],
    "close": data["c"],
    "volume": data["v"],
})

# ---- Convert UNIX seconds → IST datetime ----
# timestamps are in UTC by definition, so:
df["datetime_ist"] = (
    pd.to_datetime(df["timestamp"], unit="s", utc=True)   # make it UTC
      .dt.tz_convert("Asia/Kolkata")                     # convert to IST
)

# ---- Filter only regular market hours (9:15–15:30 IST) ----
time_only = df["datetime_ist"].dt.time

start_time = pd.to_datetime(
    f"{MARKET_START_HOUR}:{MARKET_START_MINUTE}"
).time()
end_time = pd.to_datetime(
    f"{MARKET_END_HOUR}:{MARKET_END_MINUTE}"
).time()

df = df[(time_only >= start_time) & (time_only <= end_time)]

# ---- Arrange and save ----
df = df[["datetime_ist", "timestamp", "open", "high", "low", "close", "volume"]]
df.to_csv("SBI_Bank_1H_IST_market_hours.csv", index=False)

print("Done! Saved: SBI_Bank_1H_IST_market_hours.csv")


Done! Saved: HDFC_Bank_15min_IST_market_hours.csv


In [2]:
import json
import pandas as pd

# ---- Market session (IST) ----
MARKET_START_HOUR = 9
MARKET_START_MINUTE = 15   # change to 0 if you really want from 9:00
MARKET_END_HOUR = 15
MARKET_END_MINUTE = 30

# ---- Load JSON ----
with open("SBI Bank 15 min.json", "r") as f:
    data = json.load(f)

# ---- Build DataFrame from JSON arrays ----
df = pd.DataFrame({
    "timestamp": data["t"],
    "open": data["o"],
    "high": data["h"],
    "low": data["l"],
    "close": data["c"],
    "volume": data["v"],
})

# ---- Convert UNIX seconds → IST datetime ----
# timestamps are in UTC by definition, so:
df["datetime_ist"] = (
    pd.to_datetime(df["timestamp"], unit="s", utc=True)   # make it UTC
      .dt.tz_convert("Asia/Kolkata")                     # convert to IST
)

# ---- Filter only regular market hours (9:15–15:30 IST) ----
time_only = df["datetime_ist"].dt.time

start_time = pd.to_datetime(
    f"{MARKET_START_HOUR}:{MARKET_START_MINUTE}"
).time()
end_time = pd.to_datetime(
    f"{MARKET_END_HOUR}:{MARKET_END_MINUTE}"
).time()

df = df[(time_only >= start_time) & (time_only <= end_time)]

# ---- Arrange and save ----
df = df[["datetime_ist", "timestamp", "open", "high", "low", "close", "volume"]]
df.to_csv("SBI_Bank_15min_IST_market_hours.csv", index=False)

print("Done! Saved: SBI_Bank_15min_IST_market_hours.csv")


Done! Saved: SBI_Bank_15min_IST_market_hours.csv


## Resampling 1H data to 4H

In [3]:
import pandas as pd

# ========= SETTINGS =========
INPUT_FILE = "SBI_Bank_1H_IST_market_hours.csv"
OUTPUT_FILE = "SBI_Bank_4H_IST_market_hours.csv"
# ============================

def resample_1h_to_4h(input_path: str, output_path: str):
    # 1. Load data
    df = pd.read_csv(input_path)

    # 2. Convert datetime column to pandas datetime (keeps +05:30 offset)
    #    Make sure the column name matches your CSV exactly
    df["datetime_ist"] = pd.to_datetime(df["datetime_ist"])

    # 3. Set datetime as index and sort by time (important for resample)
    df = df.set_index("datetime_ist").sort_index()

    # 4. Resample from 1H → 4H
    #    - OHLC from price columns
    #    - Sum volume
    #    - Keep first timestamp (Unix) in each 4H block
    df_4h = df.resample("4H").agg({
        "timestamp": "first",   # unix timestamp of the first 1H bar in the block
        "open": "first",
        "high": "max",
        "low": "min",
        "close": "last",
        "volume": "sum"
    })

    # 5. Drop any empty 4H candles (if any)
    df_4h = df_4h.dropna(subset=["open", "high", "low", "close"])

    # 6. Reset index so datetime_ist becomes a column again
    df_4h = df_4h.reset_index()

    # 7. Save to CSV
    df_4h.to_csv(output_path, index=False)
    print(f"4H data saved to: {output_path}")
    print(df_4h.head())

if __name__ == "__main__":
    resample_1h_to_4h(INPUT_FILE, OUTPUT_FILE)


4H data saved to: SBI_Bank_4H_IST_market_hours.csv
               datetime_ist     timestamp    open    high     low   close  \
0 2024-12-03 08:00:00+05:30  1.733198e+09  846.65  856.55  845.70  853.20   
1 2024-12-03 12:00:00+05:30  1.733209e+09  853.35  854.90  852.40  853.95   
2 2024-12-04 08:00:00+05:30  1.733285e+09  854.00  859.00  850.30  854.05   
3 2024-12-04 12:00:00+05:30  1.733296e+09  854.30  864.00  852.55  859.70   
4 2024-12-05 08:00:00+05:30  1.733371e+09  859.25  870.00  855.60  867.30   

    volume  
0  6152144  
1  4654563  
2  3547743  
3  6628761  
4  5349354  


  df_4h = df.resample("4H").agg({
