In [None]:
"""TPAW Planner Export Using Existing Simulator

This notebook runs a single-trial simulation using the existing engine and
post-processes the results into real (inflation-adjusted) post-tax income
streams suitable for TPAW Planner.
"""

import os
import sys
from pathlib import Path

# Determine workspace root directory
# This is needed because the notebook is in a subdirectory
notebook_dir = Path.cwd()
workspace_root = (
    notebook_dir.parent if notebook_dir.name == "standalone_tools" else notebook_dir
)

# Add workspace root to path FIRST, before any imports
if str(workspace_root) not in sys.path:
    sys.path.insert(0, str(workspace_root))

# Change working directory to workspace root so relative paths work
# This is critical - must be done before any app module imports
os.chdir(workspace_root)

from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd

from app.data.constants import INTERVALS_PER_YEAR
from app.models.simulator import SimulationEngine
from app.util import interval_yield

In [None]:
# Run a single simulation trial and get the first trial DataFrame
engine = SimulationEngine(trial_qty=1)

# If a constant annual real inflation rate is configured, override the
# simulated cumulative inflation path with a deterministic one based on
# that rate so downstream processing sees a stable real income level.
user_config = engine._user_config
inflation_rate = (
    user_config.tpaw_planner.inflation_rate
    if user_config.tpaw_planner and user_config.tpaw_planner.inflation_rate is not None
    else None
)
if inflation_rate is not None:
    interval_inflation_yield = interval_yield(1 + inflation_rate)
    intervals = user_config.intervals_per_trial
    # Cumulative inflation series starting at 1.0
    cumulative = np.array(
        [interval_inflation_yield**i for i in range(intervals)], dtype=float
    )
    # `_economic_sim_data.inflation` has shape (trial_qty, intervals_per_trial)
    engine._economic_sim_data.inflation[:, :] = cumulative

engine.gen_all_trials()

results = engine.results
trial_dfs = results.as_dataframes()

assert len(trial_dfs) == 1, "Expected exactly one trial DataFrame"
df = trial_dfs[0]

print("DataFrame columns:")
print(df.columns.tolist())
print(f"Number of intervals: {len(df)}")
print(df.head())

In [None]:
# Convert nominal quarterly flows to real yearly flows per stream

job_q = df["Job Income"]
ss_user_q = df["SS User"]
ss_partner_q = df["SS Partner"]
pension_q = df["Pension"]

infl = df["Inflation"]

# Real quarterly = nominal quarterly / inflation
job_real_q = job_q / infl
ss_user_real_q = ss_user_q / infl
ss_partner_real_q = ss_partner_q / infl
pension_real_q = pension_q / infl

# Real yearly
job_real_y = job_real_q * INTERVALS_PER_YEAR
ss_user_real_y = ss_user_real_q * INTERVALS_PER_YEAR
ss_partner_real_y = ss_partner_real_q * INTERVALS_PER_YEAR
pension_real_y = pension_real_q * INTERVALS_PER_YEAR

streams_real_y = pd.DataFrame(
    {
        "Date": df["Date"],
        "job_real_y": job_real_y,
        "ss_user_real_y": ss_user_real_y,
        "ss_partner_real_y": ss_partner_real_y,
        "pension_real_y": pension_real_y,
    }
)

streams_real_y

In [None]:
# Compute total real yearly taxes and apportion across streams

income_tax_q = df["Income Taxes"]
medicare_tax_q = df["Medicare Taxes"]
ss_tax_q = df["Social Security Taxes"]
portfolio_tax_q = df["Portfolio Taxes"]

# Total nominal tax per interval
total_tax_q = income_tax_q + medicare_tax_q + ss_tax_q + portfolio_tax_q

# Convert to real yearly taxes
total_tax_real_q = total_tax_q / infl
total_tax_real_y = total_tax_real_q * INTERVALS_PER_YEAR

# Total pre-tax real yearly income across streams
total_income_real_y = job_real_y + ss_user_real_y + ss_partner_real_y + pension_real_y

# Avoid division by zero when total income is 0
eps = 1e-9

share_job = job_real_y / (total_income_real_y + eps)
share_ss_user = ss_user_real_y / (total_income_real_y + eps)
share_ss_partner = ss_partner_real_y / (total_income_real_y + eps)
share_pension = pension_real_y / (total_income_real_y + eps)

# When total_income_real_y is effectively zero, set all shares to 0
zero_mask = total_income_real_y.abs() < eps
share_job[zero_mask] = 0.0
share_ss_user[zero_mask] = 0.0
share_ss_partner[zero_mask] = 0.0
share_pension[zero_mask] = 0.0

job_tax_real_y = share_job * total_tax_real_y
ss_user_tax_real_y = share_ss_user * total_tax_real_y
ss_partner_tax_real_y = share_ss_partner * total_tax_real_y
pension_tax_real_y = share_pension * total_tax_real_y

apportioned = pd.DataFrame(
    {
        "Date": df["Date"],
        "total_tax_real_y": total_tax_real_y,
        "job_tax_real_y": job_tax_real_y,
        "ss_user_tax_real_y": ss_user_tax_real_y,
        "ss_partner_tax_real_y": ss_partner_tax_real_y,
        "pension_tax_real_y": pension_tax_real_y,
    }
)

apportioned

In [None]:
# Derive real yearly post-tax streams per source and validate totals

job_post_real_y = job_real_y + job_tax_real_y
ss_user_post_real_y = ss_user_real_y + ss_user_tax_real_y
ss_partner_post_real_y = ss_partner_real_y + ss_partner_tax_real_y
pension_post_real_y = pension_real_y + pension_tax_real_y

total_post_real_y_streams = (
    job_post_real_y + ss_user_post_real_y + ss_partner_post_real_y + pension_post_real_y
)

# Consistency check: total post-tax income vs income + taxes
total_income_plus_tax_real_y = total_income_real_y + total_tax_real_y

check_df = pd.DataFrame(
    {
        "Date": df["Date"],
        "total_income_real_y": total_income_real_y,
        "total_tax_real_y": total_tax_real_y,
        "total_post_real_y_streams": total_post_real_y_streams,
        "total_income_plus_tax_real_y": total_income_plus_tax_real_y,
    }
)

diff = (
    (check_df["total_post_real_y_streams"] - check_df["total_income_plus_tax_real_y"])
    .abs()
    .max()
)
assert diff < 1.0, f"Max diff is too high: {diff}"
print("Max abs diff between streams-sum and income+tax totals: ", diff)

post_tax_streams = pd.DataFrame(
    {
        "Date": df["Date"],
        "job_post_real_y": job_post_real_y,
        "ss_user_post_real_y": ss_user_post_real_y,
        "ss_partner_post_real_y": ss_partner_post_real_y,
        "pension_post_real_y": pension_post_real_y,
    }
)

post_tax_streams

In [None]:
# Group consecutive intervals into periods per stream


@dataclass
class Period:
    start_date_dec: float
    end_date_dec: float
    amount_real_y: float


group_tol = user_config.tpaw_planner.group_tol if user_config.tpaw_planner else 1.0
print(f"Group tolerance: {group_tol}")


def group_stream(date_series, value_series):
    periods: list[Period] = []
    if len(value_series) == 0:
        return periods

    current_value = value_series.iloc[0]
    start_idx = 0

    for i in range(1, len(value_series)):
        if abs(value_series.iloc[i] - current_value) > group_tol:
            periods.append(
                Period(
                    start_date_dec=date_series.iloc[start_idx],
                    end_date_dec=date_series.iloc[i - 1],
                    amount_real_y=current_value,
                )
            )
            current_value = value_series.iloc[i]
            start_idx = i

    periods.append(
        Period(
            start_date_dec=date_series.iloc[start_idx],
            end_date_dec=date_series.iloc[len(value_series) - 1],
            amount_real_y=current_value,
        )
    )

    # Drop periods that are effectively zero
    nonzero_periods = [p for p in periods if abs(p.amount_real_y) > 1.0]
    return nonzero_periods


job_periods = group_stream(df["Date"], job_post_real_y)
ss_user_periods = group_stream(df["Date"], ss_user_post_real_y)
ss_partner_periods = group_stream(df["Date"], ss_partner_post_real_y)
pension_periods = group_stream(df["Date"], pension_post_real_y)

len(job_periods), len(ss_user_periods), len(ss_partner_periods), len(pension_periods)

In [None]:
# Convert decimal dates to "Month Year" and build TPAW-ready tables

MONTH_NAMES = [
    "Jan",
    "Feb",
    "Mar",
    "Apr",
    "May",
    "Jun",
    "Jul",
    "Aug",
    "Sep",
    "Oct",
    "Nov",
    "Dec",
]


def decimal_date_to_month_year(decimal_date, is_end_date=False):
    """Convert decimal date (e.g., 2025.5) to 'Month Year' format (e.g., 'July 2025').

    Uses quarter mapping consistent with simulator intervals:
    - Start: Jan/Apr/Jul/Oct
    - End:   Mar/Jun/Sep/Dec
    """
    year = int(decimal_date)
    quarter_decimal = decimal_date % 1
    if is_end_date:
        month_num = int(quarter_decimal * 12) + 3
    else:
        month_num = int(quarter_decimal * 12) + 1
    month_name = MONTH_NAMES[month_num - 1]
    return f"{month_name} {year}"


def periods_to_df(periods, label):
    rows = []
    for p in periods:
        rows.append(
            {
                "Start Date": decimal_date_to_month_year(
                    p.start_date_dec, is_end_date=False
                ),
                "End Date": decimal_date_to_month_year(
                    p.end_date_dec, is_end_date=True
                ),
                # Round to nearest dollar, full dollars for TPAW
                "Post-Tax Real Income (per month)": int(
                    round(p.amount_real_y * 1000 / 12)
                ),
                "Stream": label,
            }
        )
    return pd.DataFrame(rows)


job_df = periods_to_df(job_periods, "Job")
ss_user_df = periods_to_df(ss_user_periods, "SS User")
ss_partner_df = periods_to_df(ss_partner_periods, "SS Partner")
pension_df = (
    periods_to_df(pension_periods, "Pension") if pension_periods else pd.DataFrame()
)

print("Job income periods:")
print(job_df.to_string(index=False))

print("\nSS User periods:")
print(ss_user_df.to_string(index=False))

print("\nSS Partner periods:")
print(ss_partner_df.to_string(index=False))

if not pension_df.empty:
    print("\nPension periods:")
    print(pension_df.to_string(index=False))