# EV vs Actual PnL Gap

Inspect existing run artifacts to understand how EV estimates compare to realized PnL. Use this notebook to explore available metrics and highlight any missing logging.

In [1]:
from pathlib import Path
import json
import pandas as pd
from IPython.display import display

PROJECT_ROOT = Path.cwd().resolve()
search_roots = [PROJECT_ROOT] + list(PROJECT_ROOT.parents)
for root in search_roots:
    if (root / 'runs').exists():
        PROJECT_ROOT = root
        break
else:
    checked = ', '.join(str(p) for p in search_roots)
    raise FileNotFoundError('Could not locate runs directory. Checked: ' + checked)

RUNS_DIR = PROJECT_ROOT / 'runs'
DAILY_PATH = PROJECT_ROOT / 'daily.csv'

if not DAILY_PATH.exists():
    raise FileNotFoundError(f'Expected daily.csv at {DAILY_PATH}; verify data export.')



ImportError: cannot import name 'PathFound' from 'pathlib' (/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/pathlib.py)

In [2]:
def load_records(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    if 'ts' in df.columns:
        df['ts'] = pd.to_datetime(df['ts'], errors='coerce')
    numeric_cols = ['ev_lcb', 'pnl_pips', 'cost_pips', 'slip_est', 'slip_real', 'tp_pips', 'sl_pips', 'or_atr_ratio', 'min_or_atr_ratio']
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

def load_state(path: Path) -> dict:
    with path.open() as fh:
        return json.load(fh)


In [3]:
record_paths = sorted(RUNS_DIR.glob('*/records.csv'))
print(f'Found {len(record_paths)} run(s) with records.csv')
if record_paths:
    display(pd.DataFrame({'run_id': [p.parent.name for p in record_paths], 'path': record_paths}))
else:
    print('No records.csv files detected. Add logging or rerun export before proceeding.')


Found 10 run(s) with records.csv


Unnamed: 0,run_id,path
0,USDJPY_conservative_20250922_143631,/Users/izumimotohayato/development/invest3_orb...
1,USDJPY_conservative_20250922_143714,/Users/izumimotohayato/development/invest3_orb...
2,USDJPY_conservative_20250922_143756,/Users/izumimotohayato/development/invest3_orb...
3,USDJPY_conservative_20250922_150418,/Users/izumimotohayato/development/invest3_orb...
4,USDJPY_conservative_20250922_152942,/Users/izumimotohayato/development/invest3_orb...
5,USDJPY_conservative_20250922_153303,/Users/izumimotohayato/development/invest3_orb...
6,USDJPY_conservative_20250922_170854,/Users/izumimotohayato/development/invest3_orb...
7,USDJPY_conservative_20250922_170914,/Users/izumimotohayato/development/invest3_orb...
8,USDJPY_conservative_20250922_175708,/Users/izumimotohayato/development/invest3_orb...
9,USDJPY_conservative_20250923_202605,/Users/izumimotohayato/development/invest3_orb...


In [4]:
sample_path = record_paths[-1] if record_paths else None
if sample_path is None:
    raise SystemExit('No run artifacts to inspect yet.')
print(f'Sampling run: {sample_path.parent.name}')
records_df = load_records(sample_path)
print(f'Record count: {len(records_df)}')
display(records_df.head())
try:
    display(records_df.describe(include='all', datetime_is_numeric=True))
except TypeError:
    display(records_df.describe(include='all'))



Sampling run: USDJPY_conservative_20250922_143631
Record count: 200


Unnamed: 0,cost_pips,ev_lcb,exit,min_or_atr_ratio,or_atr_ratio,pnl_pips,rv_band,side,sl_pips,slip_est,slip_real,spread_band,stage,tp_pips,ts
0,,,,,,,,,,,,,no_breakout,,2018-01-01 22:00:00
1,,,,,,,,,,,,,no_breakout,,2018-01-01 22:05:00
2,,,,,,,,,,,,,no_breakout,,2018-01-01 22:10:00
3,,,,,,,,,,,,,no_breakout,,2018-01-01 22:15:00
4,,,,,,,,,,,,,no_breakout,,2018-01-01 22:20:00


Unnamed: 0,cost_pips,ev_lcb,exit,min_or_atr_ratio,or_atr_ratio,pnl_pips,rv_band,side,sl_pips,slip_est,slip_real,spread_band,stage,tp_pips,ts
count,53.0,1.0,52,1.0,93.0,52.0,92,146,53.0,52.0,52.0,92,200,53.0,200
unique,,,2,,,,1,2,,,,1,5,,
top,,,tp,,,,high,BUY,,,,narrow,gate_block,,
freq,,,51,,,,92,93,,,,92,92,,
mean,0.5,0.343228,,0.6,5.082413,0.891566,,,2.361509,0.0,1.471154,,,2.951887,2018-01-02 06:18:09
min,0.5,0.343228,,0.6,0.0,-2.917143,,,1.697143,0.0,0.0,,,2.121429,2018-01-01 22:00:00
25%,0.5,0.343228,,0.6,3.62069,0.480357,,,2.0,0.0,1.5,,,2.5,2018-01-02 02:08:45
50%,0.5,0.343228,,0.6,5.034247,1.089286,,,2.462857,0.0,1.5,,,3.078571,2018-01-02 06:17:30
75%,0.5,0.343228,,0.6,5.927419,1.383929,,,2.697143,0.0,1.5,,,3.371429,2018-01-02 10:26:15
max,0.5,0.343228,,0.6,8.647059,1.628571,,,2.902857,0.0,1.5,,,3.628571,2018-01-02 14:45:00


In [5]:
stage_counts = records_df['stage'].value_counts(dropna=False) if 'stage' in records_df else pd.Series(dtype=int)
print('Stage distribution:')
display(stage_counts)

if 'pnl_pips' in records_df:
    trade_mask = records_df['pnl_pips'].notna()
else:
    trade_mask = pd.Series(False, index=records_df.index)
trade_records = records_df.loc[trade_mask].copy()
print(f'Realized trade rows: {len(trade_records)}')
if len(trade_records) == 0:
    print('No realized PnL entries yet — consider enhancing logging if this persists.')
else:
    if 'ev_lcb' in trade_records:
        trade_records['ev_gap'] = trade_records['pnl_pips'] - trade_records['ev_lcb']
    else:
        trade_records['ev_gap'] = pd.NA
    display(trade_records[['ts', 'stage', 'ev_lcb', 'pnl_pips', 'ev_gap']].head())



Stage distribution:


stage
gate_block     92
no_breakout    54
trade          52
or_filter       1
ev_reject       1
Name: count, dtype: int64

Realized trade rows: 52


Unnamed: 0,ts,stage,ev_lcb,pnl_pips,ev_gap
125,2018-01-02 08:25:00,trade,,-2.917143,
126,2018-01-02 08:30:00,trade,,1.457143,
127,2018-01-02 08:35:00,trade,,1.578571,
128,2018-01-02 08:40:00,trade,,1.585714,
129,2018-01-02 08:45:00,trade,,1.6,


In [6]:
state_path = sample_path.with_name('state.json')
if not state_path.exists():
    print('State file missing; EV profile unavailable for this run.')
else:
    state = load_state(state_path)
    ev_global = state.get('ev_global', {})
    ev_buckets = state.get('ev_buckets', {})
    print('Global EV parameters')
    display(pd.Series(ev_global))
    if ev_buckets:
        ev_bucket_df = pd.DataFrame.from_dict(ev_buckets, orient='index')
        display(ev_bucket_df)
    else:
        print('No bucket-level EV parameters recorded')


Global EV parameters


alpha          24.446287
beta           20.137978
prior_alpha     0.000000
prior_beta      0.000000
decay           0.020000
conf            0.950000
dtype: float64

Unnamed: 0,alpha,beta
TOK:narrow:high,1.0,1.0
TOK:narrow:mid,1.0,1.0
LDN:narrow:high,25.050562,18.453786
LDN:narrow:mid,2.844321,3.767481
NY:narrow:high,1.9404,1.9604
NY:narrow:mid,1.9604,1.9404
NY:narrow:low,1.0,1.0
TOK:narrow:low,1.0,1.0
LDN:narrow:low,1.0,1.0


In [7]:
daily_df = pd.read_csv(DAILY_PATH, parse_dates=['date'])
daily_df = daily_df.sort_values('date')
print(f'Daily rows: {len(daily_df)}')
display(daily_df.head())
display(daily_df.tail())


Daily rows: 2190


Unnamed: 0,date,breakouts,gate_pass,gate_block,ev_pass,ev_reject,fills,wins,pnl_pips
0,2018-01-01,9,0,9,0,0,0,0,0.0
1,2018-01-02,253,131,122,0,131,0,0,0.0
2,2018-01-03,172,128,44,0,128,0,0,0.0
3,2018-01-04,215,102,113,0,102,0,0,0.0
4,2018-01-05,245,106,139,0,106,0,0,0.0


Unnamed: 0,date,breakouts,gate_pass,gate_block,ev_pass,ev_reject,fills,wins,pnl_pips
2185,2024-12-26,265,146,119,0,146,0,0,0.0
2186,2024-12-27,204,106,98,0,106,0,0,0.0
2187,2024-12-29,12,0,12,0,0,0,0,0.0
2188,2024-12-30,248,118,130,0,118,0,0,0.0
2189,2024-12-31,242,146,96,0,146,0,0,0.0


In [8]:
if 'trade_records' not in globals():
    print('trade_records is undefined. Run the preparation cells above first.')
elif trade_records.empty:
    print('Daily aggregation skipped because no realized trade records were found.')
else:
    trade_daily = (trade_records
        .dropna(subset=['ts'])
        .assign(date=lambda d: d['ts'].dt.floor('D'))
        .groupby('date')[['pnl_pips', 'ev_lcb']]
        .sum()
        .rename(columns={'pnl_pips': 'realized_pnl_pips', 'ev_lcb': 'ev_lcb_sum'})
        .reset_index())
    merged = daily_df.merge(trade_daily, how='left', on='date')
    display(merged.head())
    print('Columns available for plotting EV vs realized PnL:')
    display(merged.columns)



Unnamed: 0,date,breakouts,gate_pass,gate_block,ev_pass,ev_reject,fills,wins,pnl_pips,realized_pnl_pips,ev_lcb_sum
0,2018-01-01,9,0,9,0,0,0,0,0.0,,
1,2018-01-02,253,131,122,0,131,0,0,0.0,46.361429,0.0
2,2018-01-03,172,128,44,0,128,0,0,0.0,,
3,2018-01-04,215,102,113,0,102,0,0,0.0,,
4,2018-01-05,245,106,139,0,106,0,0,0.0,,


Columns available for plotting EV vs realized PnL:


Index(['date', 'breakouts', 'gate_pass', 'gate_block', 'ev_pass', 'ev_reject',
       'fills', 'wins', 'pnl_pips', 'realized_pnl_pips', 'ev_lcb_sum'],
      dtype='object')

## Next Steps
- Use the aggregated dataframe(s) above to build plots (e.g., EV vs realized PnL).
- If `trade_records` remains empty, investigate strategy logging so that fill/exit events capture realized PnL.
- Extend the notebook with additional joins (e.g., by bucket or RV band) once the required metrics exist.