In [None]:
import json
import pandas as pd
from pandas import json_normalize

INPUT_FILE = "14112025_BANK_PNL.txt"     # your pasted file
OUTPUT_CSV = "flattened_snapshots.csv"


def parse_snapshot(obj):
    """
    Flatten one snapshot including nested Current, Previous, Next JSON blocks.
    """
    flat = {}

    # 1. Copy all top-level fields except 'Current'
    for k, v in obj.items():
        if k != "Current":
            flat[k] = v

    # 2. Parse nested Current JSON string safely
    if "Current" in obj:
        try:
            curr = json.loads(obj["Current"])
        except Exception:
            curr = {}

        # Process Previous, Current, Next inside
        for section_name in ["Previous", "Current", "Next"]:
            if section_name in curr and isinstance(curr[section_name], dict):
                for key, val in curr[section_name].items():
                    flat[f"{section_name}_{key}"] = val
            else:
                # Add empty if missing
                flat[f"{section_name}"] = None

    return flat


def load_snapshots(path):
    """
    Load file that contains:
    - either JSON objects separated by newlines
    - or multiple JSON blobs one after another
    """
    snapshots = []
    with open(path, "r") as f:
        raw = f.read().strip()

    # Try line-by-line JSON parsing
    for line in raw.splitlines():
        line = line.strip()
        if not line:
            continue
        try:
            snapshots.append(json.loads(line))
        except:
            pass

    # If nothing loaded, try parsing entire file as list
    if len(snapshots) == 0:
        try:
            snapshots = json.loads(raw)
        except:
            raise ValueError("Cannot parse file: Format looks invalid")

    return snapshots


def main():
    snapshots = load_snapshots(INPUT_FILE)
    print(f"Loaded snapshots: {len(snapshots)}")

    # Flatten all snapshots
    flattened = [parse_snapshot(snap) for snap in snapshots]

    # Convert to DataFrame
    df = pd.DataFrame(flattened)

    # Normalize types
    for col in df.columns:
        # Clean array strings "[]"
        df[col] = df[col].apply(lambda x: None if x == "[]" else x)

    # Convert timestamp
    if "LTT" in df.columns:
        df["LTT"] = pd.to_datetime(df["LTT"], errors="coerce")

    # Save to CSV
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"Saved: {OUTPUT_CSV}")


if __name__ == "__main__":
    main()


Loaded snapshots: 22300
Saved: flattened_snapshots.csv


In [None]:
import pandas as pd
import numpy as np
from collections import defaultdict, Counter
from datetime import timedelta

# ============================================================
# LOAD CSV
# ============================================================
df = pd.read_csv("flattened_snapshots.csv", low_memory=False, parse_dates=['LTT'])

# Detect which premium column exists
prev_p = "Previous_Call_Premium" if "Previous_Call_Premium" in df else "Previous_Call_ltp"
curr_p = "Current_Call_Premium"  if "Current_Call_Premium"  in df else "Current_Call_ltp"
next_p = "Next_Call_Premium"     if "Next_Call_Premium"     in df else "Next_Call_ltp"

prev_str, curr_str, next_str = "Previous_Strikeprice","Current_Strikeprice","Next_Strikeprice"

# ============================================================
# BUILD STRIKE â†’ (timestamp, premium) TIMESERIES
# ============================================================
strike_series = defaultdict(list)

for _, row in df.iterrows():
    t = row["LTT"]
    for sc, pc in [(prev_str, prev_p), (curr_str, curr_p), (next_str, next_p)]:
        if sc in row and pc in row and not pd.isna(row[sc]) and not pd.isna(row[pc]):
            try:
                s = int(row[sc])
                p = float(row[pc])
                strike_series[s].append((t, p))
            except:
                pass

# ============================================================
# SUMMARY + FORECAST FUNCTIONS
# ============================================================
def summarize(series):
    sr = sorted(series, key=lambda x: x[0])
    ps = [p for _, p in sr]

    if not ps:
        return None

    first, last = ps[0], ps[-1]
    peak, trough = max(ps), min(ps)
    pct = ((last-first)/first*100) if first != 0 else 0

    return {
        "first_premium": first,
        "last_premium": last,
        "peak_premium": peak,
        "trough_premium": trough,
        "abs_change": last - first,
        "pct_change": pct,
        "n_obs": len(ps)
    }

def forecast(stats):
    pct = stats["pct_change"]
    last = stats["last_premium"]

    if pct >= 25:
        return (last+15, last+35), (last+25, last+60)
    elif pct >= 8:
        return (last+6, last+18), (last+12, last+30)
    elif pct > 0:
        return (last+2, last+8), (last+5, last+15)
    else:
        return (last-5, last+2), (last-8, last+5)

# ============================================================
# BUILD SUMMARY FOR ALL STRIKES
# ============================================================
records = []

for s, ts in strike_series.items():
    if len(ts) < 3:
        continue

    st = summarize(ts)
    if st is None:
        continue

    f5, f10 = forecast(st)

    rec = {
        "strike": s,
        **st,
        "5min_low":  f5[0],
        "5min_high": f5[1],
        "10min_low": f10[0],
        "10min_high": f10[1],
        "p5_expected_lo": f5[0] - st["last_premium"],
        "p5_expected_hi": f5[1] - st["last_premium"],
        "p10_expected_lo": f10[0] - st["last_premium"],
        "p10_expected_hi": f10[1] - st["last_premium"]
    }

    records.append(rec)

summary = pd.DataFrame(records)

# ============================================================
# STRATEGY TAGS + MONEYFLOW EXTRACTION
# ============================================================
tag_keywords = [
    "RSI", "MACD", "VWAP", "RSI_MACD", "VWAP_Divergence", "OI_Support_Call",
    "Put Buying", "Call Writing", "PnL", "Momentum", "Breakout"
]

strike_info = {
    int(s): {
        "tags": Counter(),
        "call_moneyflow": 0.0,
        "put_moneyflow": 0.0
    }
    for s in summary["strike"]
}

for _, row in df.iterrows():
    for sc in [prev_str, curr_str, next_str]:
        if sc in row and not pd.isna(row[sc]):
            try:
                s = int(row[sc])
            except:
                continue

            if s not in strike_info:
                continue

            # Extract tags
            for col in ["Previous_StrategyTag", "Current_StrategyTag", "Next_StrategyTag"]:
                if col in df and isinstance(row.get(col), str):
                    for kw in tag_keywords:
                        if kw.lower() in row[col].lower():
                            strike_info[s]["tags"][kw] += 1

            # Moneyflow
            for col in [
                "Previous_CallMoneyFlow","Current_CallMoneyFlow","Next_CallMoneyFlow",
                "Previous_PutMoneyFlow","Current_PutMoneyFlow","Next_PutMoneyFlow"
            ]:
                if col in df and not pd.isna(row.get(col)):
                    val = float(row[col])
                    if "Call" in col:
                        strike_info[s]["call_moneyflow"] += val
                    else:
                        strike_info[s]["put_moneyflow"] += val

# ============================================================
# HIGH CONVICTION SIGNAL HIT RATE
# ============================================================
highconv_col = "Current_IsHighConvictionSignal"

# ============================================================
# BUILD FINAL OUTPUT
# ============================================================
final_rows = []

for _, r in summary.iterrows():
    s = int(r["strike"])
    info = strike_info[s]

    # Build reasoning text
    tags = info["tags"]
    reasons = []

    if tags.get("RSI",0) or tags.get("RSI_MACD",0):
        reasons.append("RSI/MACD bullish pattern")
    if tags.get("VWAP_Divergence",0):
        reasons.append("VWAP divergence support")
    if tags.get("OI_Support_Call",0):
        reasons.append("OI call support detected")
    if tags.get("Put Buying",0):
        reasons.append("Put side hedging activity")

    if not reasons:
        reasons.append("No strong signals")

    # Recommended Action
    if r["pct_change"] > 5:
        act = "BUY_CALL"
    elif r["pct_change"] < -5:
        act = "BUY_PUT"
    else:
        act = "HOLD"

    # High-conviction stats
    hc_rows = df[(df[curr_str]==s) & (df.get(highconv_col)==True)]
    hc_total = hc_rows.shape[0]
    hc_success = 0

    for _, row2 in hc_rows.iterrows():
        t0 = row2["LTT"]
        p0 = row2[curr_p]

        window = df[
            (df["LTT"] >= t0) &
            (df["LTT"] <= t0 + timedelta(minutes=3)) &
            (df[curr_str] == s)
        ]

        if not window.empty:
            if window[curr_p].max() > p0:
                hc_success += 1

    final_rows.append({
        **r.to_dict(),
        "call_moneyflow": info["call_moneyflow"],
        "put_moneyflow": info["put_moneyflow"],
        "tags": ";".join([f"{k}:{v}" for k,v in tags.items()]),
        "reasons": "; ".join(reasons),
        "recommended_action": act,
        "highconv_total": hc_total,
        "highconv_success": hc_success,
        "highconv_hit_rate": (hc_success / hc_total) if hc_total > 0 else None,
        "Current_Strikeprice": s
    })

final_df = pd.DataFrame(final_rows)

# SAVE OUTPUT
final_df.to_csv("FULL_STRIKE_FORECAST_OUTPUT.csv", index=False)
print("Generated FULL_STRIKE_FORECAST_OUTPUT.csv successfully!")


Generated FULL_STRIKE_FORECAST_OUTPUT.csv successfully!
