# Tax-Aware Factor Portfolio Builder

_Educational use only; not tax, legal, or investment advice._

## Setup & Configuration

In [None]:
DATA_MODE = "yfinance"  # options: "yfinance" or "csv"
DATA_START_DATE = "2010-01-01"
DATA_FREQUENCY = "M"  # "D" for daily
CSV_FUND_DATA_DIR = "../data/funds"   # from the notebooks/ folder
FACTOR_DATA_PATH = "../data/factors/F-F_Research_Data_5_Factors_2x3 2.csv"

In [None]:
import sys, pathlib
# Ensure project root on path for src imports
ROOT = pathlib.Path('..').resolve()
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))


In [None]:
%pip install --upgrade -q pip
%pip install -q -r ../requirements.txt

In [None]:
from src.reporting import format_run_summary
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.data_loaders import fetch_all_fund_returns_yf, load_all_fund_returns_csv, load_factor_data_default
from src.factors import apply_factor_estimation_to_all_funds, compute_portfolio_asset_class_breakdown, compute_portfolio_factor_loadings
from src.optimization import solve_pre_tax_optimization
from src.tax import build_tax_drag_table
from src.tax_location import solve_tax_location_problem, build_naive_allocation, summarize_tax_drag
from src.monte_carlo import simulate_factor_premia, simulate_portfolio_paths, simulate_after_tax_wealth
from src.models import TargetPortfolioSpec, default_target_portfolio_spec, InvestorProfile, Account


In [None]:
from src.models import Fund
import logging
logging.basicConfig(level=logging.INFO)

FUND_UNIVERSE = [
    Fund(ticker="VTI", name="Vanguard Total Stock Market", manager="Vanguard", asset_class="US_Equity", region="US", vehicle_type="ETF", expense_ratio=0.0003, dividend_yield=0.012, qualified_dividend_ratio=0.95, cap_gain_distribution_yield=0.0),
    Fund(ticker="VXUS", name="Vanguard Total International Stock", manager="Vanguard", asset_class="Intl_Equity", region="Global ex-US", vehicle_type="ETF", expense_ratio=0.0007, dividend_yield=0.025, qualified_dividend_ratio=0.75, cap_gain_distribution_yield=0.0),
    Fund(ticker="AVUV", name="Avantis US Small Cap Value", manager="Avantis", asset_class="US_Equity", region="US", vehicle_type="ETF", expense_ratio=0.0025, dividend_yield=0.015, qualified_dividend_ratio=0.8, cap_gain_distribution_yield=0.005),
    Fund(ticker="AVDV", name="Avantis Intl Small Cap Value", manager="Avantis", asset_class="Intl_Equity", region="Intl Developed", vehicle_type="ETF", expense_ratio=0.0036, dividend_yield=0.02, qualified_dividend_ratio=0.75, cap_gain_distribution_yield=0.005),
    Fund(ticker="BND", name="Vanguard Total Bond Market", manager="Vanguard", asset_class="Bond", region="US", vehicle_type="ETF", expense_ratio=0.0005, dividend_yield=0.03, qualified_dividend_ratio=0.0, cap_gain_distribution_yield=0.0, income_character="mostly_interest"),
    Fund(ticker="AVIG", name="Avantis Core Fixed Income", manager="Avantis", asset_class="Bond", region="US", vehicle_type="ETF", expense_ratio=0.0036, dividend_yield=0.035, qualified_dividend_ratio=0.0, cap_gain_distribution_yield=0.0, income_character="mostly_interest"),
    Fund(ticker="AVGV", name="Avantis Global Value", manager="Avantis", asset_class="Global_Equity", region="Global", vehicle_type="ETF", expense_ratio=0.0025, dividend_yield=0.02, qualified_dividend_ratio=0.8, cap_gain_distribution_yield=0.005),
    Fund(ticker="DXUV", name="DFA US Value ETF", manager="DFA", asset_class="US_Equity", region="US", vehicle_type="ETF", expense_ratio=0.0012, dividend_yield=0.018, qualified_dividend_ratio=0.8, cap_gain_distribution_yield=0.002),
    Fund(ticker="DXIV", name="DFA International Value ETF", manager="DFA", asset_class="Intl_Equity", region="Intl Developed", vehicle_type="ETF", expense_ratio=0.0018, dividend_yield=0.022, qualified_dividend_ratio=0.75, cap_gain_distribution_yield=0.003),
    Fund(ticker="DFSV", name="DFA US Small Cap Value ETF", manager="DFA", asset_class="US_Equity", region="US", vehicle_type="ETF", expense_ratio=0.0026, dividend_yield=0.017, qualified_dividend_ratio=0.75, cap_gain_distribution_yield=0.003),
    Fund(ticker="DISV", name="DFA International Small Cap Value ETF", manager="DFA", asset_class="Intl_Equity", region="Intl Developed", vehicle_type="ETF", expense_ratio=0.0035, dividend_yield=0.02, qualified_dividend_ratio=0.7, cap_gain_distribution_yield=0.003),
]


## Data Loading (Funds & Factors)

In [None]:
from src.data_loaders import fetch_all_fund_returns_yf, load_all_fund_returns_csv, load_factor_data_default
if DATA_MODE == "yfinance":
    fund_return_series = fetch_all_fund_returns_yf(FUND_UNIVERSE, start=DATA_START_DATE, frequency=DATA_FREQUENCY)
elif DATA_MODE == "csv":
    fund_return_series = load_all_fund_returns_csv(FUND_UNIVERSE, base_dir=CSV_FUND_DATA_DIR, frequency=DATA_FREQUENCY)
else:
    raise ValueError("Unsupported DATA_MODE")

for fund in FUND_UNIVERSE:
    if fund.ticker in fund_return_series:
        fund.historical_returns = fund_return_series[fund.ticker]
    else:
        logging.warning("No returns found for %s", fund.ticker)

factor_df = load_factor_data_default(FACTOR_DATA_PATH, frequency=DATA_FREQUENCY)
display(factor_df.head())

## Factor Estimation & Reporting

In [None]:
from src.factors import apply_factor_estimation_to_all_funds

target_spec = default_target_portfolio_spec()
factor_list = target_spec.factor_list

apply_factor_estimation_to_all_funds(FUND_UNIVERSE, factor_df, factor_list)

beta_rows = []
for f in FUND_UNIVERSE:
    if f.factor_loadings:
        row = {"Ticker": f.ticker}
        row.update({fac: f.factor_loadings.get(fac, float('nan')) for fac in factor_list})
        beta_rows.append(row)
    else:
        logging.warning("Fund %s missing factor estimates", f.ticker)

beta_df = pd.DataFrame(beta_rows).set_index("Ticker") if beta_rows else pd.DataFrame()
display(beta_df)

if not beta_df.empty:
    beta_df.plot(kind="bar", figsize=(8,4))
    plt.title("Estimated Factor Loadings")
    plt.tight_layout()
# Enforce 1% weight floor to avoid dust weights
target_spec.weight_floor = 0.01


## Pre-Tax Optimization (starter)

In [None]:
weights = solve_pre_tax_optimization(FUND_UNIVERSE, target_spec)

portfolio_factors = compute_portfolio_factor_loadings(weights, FUND_UNIVERSE, factor_list)
asset_breakdown = compute_portfolio_asset_class_breakdown(weights, FUND_UNIVERSE)

weights_df = pd.DataFrame({
    'Ticker': [f.ticker for f in FUND_UNIVERSE],
    'Manager': [f.manager for f in FUND_UNIVERSE],
    'AssetClass': [f.asset_class for f in FUND_UNIVERSE],
    'Weight': weights,
})
display(weights_df)

summary_text = format_run_summary(
    config={
        'data_mode': DATA_MODE,
        'start_date': DATA_START_DATE,
        'frequency': DATA_FREQUENCY,
        'factor_data_path': FACTOR_DATA_PATH,
        'weight_floor': target_spec.weight_floor,
    },
    target_spec=target_spec,
    weights=weights,
    funds=FUND_UNIVERSE,
    portfolio_factors=portfolio_factors,
    asset_breakdown=asset_breakdown,
)
print(summary_text)


In [None]:
# Factor target vs portfolio comparison
target_vec = pd.Series({f: target_spec.factor_targets.get(f, 0.0) for f in factor_list}, name='Target')
portfolio_vec = pd.Series(portfolio_factors, name='Portfolio')
comparison = pd.concat([target_vec, portfolio_vec], axis=1)
display(comparison)

comparison.plot(kind='bar', figsize=(8,4))
plt.title('Factor Target vs Portfolio')
plt.tight_layout()

In [None]:
# Asset class breakdown preview
pd.DataFrame(asset_breakdown)

## Tax Modeling (drag by account)

In [None]:
investor = InvestorProfile(
    federal_ordinary_rate=0.32,
    federal_ltcg_qualdiv_rate=0.15,
    state_income_rate=0.05,
    uses_municipal_bonds=False,
    time_horizon_years=20,
)

accounts = [
    Account(name="Taxable", type="Taxable", current_value=200000, available_funds=["ANY"]),
    Account(name="401k", type="TaxDeferred", current_value=300000, available_funds=["ANY"]),
    Account(name="Roth", type="TaxFree", current_value=100000, available_funds=["ANY"]),
]

tax_drag_df = build_tax_drag_table(FUND_UNIVERSE, investor, accounts)
display(tax_drag_df.head())

### Tax Drag Comparison: Naive vs Optimized Tax Location

In [None]:
from src.tax_location import solve_tax_location_problem, build_naive_allocation, summarize_tax_drag

weights = solve_pre_tax_optimization(FUND_UNIVERSE, target_spec)

tax_drag_df = build_tax_drag_table(FUND_UNIVERSE, investor, accounts)

opt_allocation = solve_tax_location_problem(weights, FUND_UNIVERSE, accounts, tax_drag_df)
naive_allocation = build_naive_allocation(weights, accounts)

opt_summary = summarize_tax_drag(opt_allocation, tax_drag_df, FUND_UNIVERSE, accounts)
naive_summary = summarize_tax_drag(naive_allocation, tax_drag_df, FUND_UNIVERSE, accounts)

opt_total_drag = (opt_summary['tax_drag'] * opt_summary['%Total']).sum()
naive_total_drag = (naive_summary['tax_drag'] * naive_summary['%Total']).sum()

print(f"Optimized tax drag (bps): {opt_total_drag*1e4:.2f}")
print(f"Naive tax drag (bps): {naive_total_drag*1e4:.2f}")
print(f"Difference (bps): {(naive_total_drag-opt_total_drag)*1e4:.2f}")

In [None]:
# Display allocation summaries
print('Optimized Allocation:')
display(opt_summary)
print('Naive Allocation:')
display(naive_summary)

In [None]:
# Quick bar chart for tax drag comparison
import matplotlib.pyplot as plt
plt.bar(['Naive','Optimized'], [naive_total_drag*1e4, opt_total_drag*1e4])
plt.ylabel('Tax drag (bps)')
plt.title('Tax drag comparison')
plt.show()

## Interactive Dashboard (ipywidgets)

In [None]:
import ipywidgets as widgets

def run_analysis(equity_pct=70, tilt_strength=1.0, alt_pct=5):
    # Update target spec based on sliders
    ts = default_target_portfolio_spec()
    ts.weight_floor = 0.01  # enforce 1% minimum weight
    equity = equity_pct / 100
    alts = alt_pct / 100
    bond = max(0.0, 1.0 - equity - alts)
    ts.equity_weight = equity
    ts.bond_weight = bond
    ts.alts_weight = alts

    # Scale SMB/HML tilt
    base_targets = default_target_portfolio_spec().factor_targets
    ts.weight_floor = 0.01  # enforce 1% minimum weight
    ts.factor_targets = base_targets.copy()
    for fac in ["SMB", "HML"]:
        if fac in ts.factor_targets:
            ts.factor_targets[fac] = base_targets[fac] * tilt_strength

    # Re-run factor estimation and optimization
    apply_factor_estimation_to_all_funds(FUND_UNIVERSE, factor_df, ts.factor_list)
    weights = solve_pre_tax_optimization(FUND_UNIVERSE, ts)
    portfolio_factors = compute_portfolio_factor_loadings(weights, FUND_UNIVERSE, ts.factor_list)
    asset_breakdown = compute_portfolio_asset_class_breakdown(weights, FUND_UNIVERSE)

    # Tax location step
    tax_drag_df = build_tax_drag_table(FUND_UNIVERSE, investor, accounts)
    opt_alloc = solve_tax_location_problem(weights, FUND_UNIVERSE, accounts, tax_drag_df)
    naive_alloc = build_naive_allocation(weights, accounts)
    opt_summary = summarize_tax_drag(opt_alloc, tax_drag_df, FUND_UNIVERSE, accounts)
    naive_summary = summarize_tax_drag(naive_alloc, tax_drag_df, FUND_UNIVERSE, accounts)
    opt_drag = (opt_summary['tax_drag'] * opt_summary['%Total']).sum()
    naive_drag = (naive_summary['tax_drag'] * naive_summary['%Total']).sum()

    display(pd.DataFrame({
        'Ticker': [f.ticker for f in FUND_UNIVERSE],
        'Manager': [f.manager for f in FUND_UNIVERSE],
        'AssetClass': [f.asset_class for f in FUND_UNIVERSE],
        'Weight': weights,
    }))

    comp = pd.concat([
        pd.Series({f: ts.factor_targets.get(f, 0.0) for f in ts.factor_list}, name='Target'),
        pd.Series(portfolio_factors, name='Portfolio'),
    ], axis=1)
    display(comp)

    print(f"Optimized tax drag (bps): {opt_drag*1e4:.2f}")
    print(f"Naive tax drag (bps): {naive_drag*1e4:.2f}")
    print(f"Difference (bps): {(naive_drag-opt_drag)*1e4:.2f}")

    summary_text = format_run_summary(
        config={
            'data_mode': DATA_MODE,
            'start_date': DATA_START_DATE,
            'frequency': DATA_FREQUENCY,
            'factor_data_path': FACTOR_DATA_PATH,
            'weight_floor': ts.weight_floor,
        },
        target_spec=ts,
        weights=weights,
        funds=FUND_UNIVERSE,
        portfolio_factors=portfolio_factors,
        asset_breakdown=asset_breakdown,
        tax_stats={
            'optimized_bps': opt_drag*1e4,
            'naive_bps': naive_drag*1e4,
        },
    )
    print(summary_text)

    comp.plot(kind='bar', figsize=(8,4))
    plt.title('Factor Target vs Portfolio')
    plt.tight_layout()
    plt.show()

    plt.bar(['Naive','Optimized'], [naive_drag*1e4, opt_drag*1e4])
    plt.ylabel('Tax drag (bps)')
    plt.title('Tax drag comparison')
    plt.tight_layout()
    plt.show()

    return {"weights": weights, "portfolio_factors": portfolio_factors, 'opt_drag_bps': opt_drag*1e4}


In [None]:
# widgets.interact(
#     run_analysis,
#     equity_pct=widgets.IntSlider(description='Equity %', min=40, max=100, step=5, value=70),
#     tilt_strength=widgets.FloatSlider(description='Tilt Strength', min=0.0, max=2.0, step=0.1, value=1.0),
#     alt_pct=widgets.IntSlider(description='Alt %', min=0, max=30, step=1, value=5),
# )


## Monte Carlo Simulation (After-Tax Wealth)

In [None]:
import numpy as np
from src.monte_carlo import simulate_factor_premia, simulate_portfolio_paths, simulate_after_tax_wealth

# Parameters
HORIZON_YEARS = 10
N_SIMS = 2000
rf = 0.001
factor_list_mc = ['MKT_RF', 'SMB']
mu = np.array([0.005, 0.002])
cov = np.array([[0.0004, 0.0],[0.0,0.0001]])

# Use latest portfolio_factors from optimization above (fallback to zeros)
if 'portfolio_factors' not in globals():
    portfolio_factors = {f: 0.0 for f in factor_list_mc}

factor_sims = simulate_factor_premia(T=HORIZON_YEARS*12, n_sims=N_SIMS, factor_list=factor_list_mc, mu=mu, cov=cov)
portfolio_paths = simulate_portfolio_paths(factor_sims, {f: portfolio_factors.get(f, 0.0) for f in factor_list_mc}, rf)

# Build allocation from last optimized weights if available
if 'weights' in globals():
    tax_drag_df = build_tax_drag_table(FUND_UNIVERSE, investor, accounts)
    opt_alloc = solve_tax_location_problem(weights, FUND_UNIVERSE, accounts, tax_drag_df)
    total_value = sum(a.current_value for a in accounts)
else:
    tax_drag_df = build_tax_drag_table(FUND_UNIVERSE, investor, accounts)
    opt_alloc = np.ones((len(FUND_UNIVERSE), len(accounts)))
    total_value = sum(a.current_value for a in accounts)

wealth = simulate_after_tax_wealth(total_value, opt_alloc, tax_drag_df, FUND_UNIVERSE, accounts, portfolio_paths)

import matplotlib.pyplot as plt
plt.hist(wealth, bins=20, alpha=0.7)
plt.title('Terminal After-Tax Wealth Distribution')
plt.xlabel('Wealth')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

quantiles = np.percentile(wealth, [5,25,50,75,95])
quantile_df = pd.DataFrame({'percentile':[5,25,50,75,95],'wealth':quantiles})
display(quantile_df)

### Factor Visuals: Bar + Radar

In [None]:
import numpy as np
from math import pi

def plot_factor_radar(targets, actuals):
    labels = list(targets.keys())
    stats = [targets[l] for l in labels]
    stats2 = [actuals.get(l,0.0) for l in labels]
    N = len(labels)
    angles = [n / float(N) * 2 * pi for n in range(N)]
    stats += stats[:1]; stats2 += stats2[:1]; angles += angles[:1]
    fig = plt.figure(figsize=(5,5))
    ax = plt.subplot(111, polar=True)
    ax.plot(angles, stats, linewidth=2, label='Target')
    ax.fill(angles, stats, alpha=0.1)
    ax.plot(angles, stats2, linewidth=2, label='Portfolio')
    ax.fill(angles, stats2, alpha=0.1)
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(labels)
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
    plt.title('Factor Radar')
    plt.show()

plot_factor_radar({f: target_spec.factor_targets.get(f,0.0) for f in factor_list}, portfolio_factors)

### Asset/Manager/Region Breakdown

In [None]:
breakdown_df = pd.DataFrame(asset_breakdown) if 'asset_breakdown' in globals() else pd.DataFrame()
if not breakdown_df.empty:
    breakdown_df.plot(kind='bar', figsize=(8,4), rot=0)
    plt.title('Allocation Breakdown')
    plt.tight_layout()
    plt.show()
else:
    print('Run optimization first to populate asset_breakdown')

### Regression Diagnostics

In [None]:
for f in FUND_UNIVERSE:
    if f.historical_returns is not None and f.factor_loadings:
        aligned = pd.concat([f.historical_returns, factor_df], axis=1, join='inner').dropna()
        if 'RF' in aligned.columns:
            excess = aligned.iloc[:,0] - aligned['RF']
        else:
            excess = aligned.iloc[:,0]
        if not set(target_spec.factor_list).issubset(aligned.columns):
            continue
        factors_mat = aligned[target_spec.factor_list].to_numpy()
        betas = np.array([f.factor_loadings.get(fac,0.0) for fac in target_spec.factor_list])
        pred_vals = factors_mat @ betas + (f.alpha if f.alpha is not None else 0.0)
        plt.scatter(excess, pred_vals, alpha=0.5)
        plt.xlabel('Actual Excess Return')
        plt.ylabel('Predicted Excess Return')
        plt.title(f'Actual vs Predicted: {f.ticker}')
        plt.axline((0,0),(1,1), color='gray', linestyle='--')
        plt.tight_layout()
        plt.show()
        plt.hist(excess - pred_vals, bins=20, alpha=0.7)
        plt.title(f'Residuals: {f.ticker}')
        plt.tight_layout()
        plt.show()
        break

### Tax Drag Heatmap and Allocation Bars

In [None]:
if 'tax_drag_df' not in globals():
    tax_drag_df = build_tax_drag_table(FUND_UNIVERSE, investor, accounts)
heat_df = tax_drag_df.reset_index().pivot(index='Fund', columns='AccountType', values='tax_drag') if not tax_drag_df.empty else pd.DataFrame()
if not heat_df.empty:
    plt.imshow(heat_df, cmap='Reds')
    plt.xticks(range(len(heat_df.columns)), heat_df.columns)
    plt.yticks(range(len(heat_df.index)), heat_df.index)
    plt.colorbar(label='Tax drag')
    plt.title('Tax Drag Heatmap')
    plt.tight_layout()
    plt.show()
else:
    print('No tax drag data to plot')

if 'opt_summary' in globals() and 'naive_summary' in globals():
    def plot_alloc(summary, title):
        pivot = summary.pivot(index='Account', columns='Fund', values='%Account').fillna(0)
        pivot.plot(kind='bar', stacked=True, figsize=(8,4))
        plt.title(title)
        plt.ylabel('% of Account')
        plt.tight_layout()
        plt.show()
    plot_alloc(opt_summary, 'Optimized Allocation by Account')
    plot_alloc(naive_summary, 'Naive Allocation by Account')
else:
    print('Run tax location section to see allocation bars')

### Monte Carlo Wealth Violin / Fan Chart

In [None]:
if 'wealth' in globals():
    plt.violinplot(wealth, showmeans=True)
    plt.title('Terminal Wealth Violin')
    plt.ylabel('Wealth')
    plt.tight_layout()
    plt.show()
    if 'portfolio_paths' in globals():
        cum_paths = (1 + portfolio_paths).cumprod(axis=1)
        pct = np.percentile(cum_paths, [5,25,50,75,95], axis=0)
        plt.fill_between(range(cum_paths.shape[1]), pct[0], pct[4], color='lightblue', alpha=0.4, label='5-95%')
        plt.fill_between(range(cum_paths.shape[1]), pct[1], pct[3], color='blue', alpha=0.4, label='25-75%')
        plt.plot(pct[2], color='navy', label='Median')
        plt.title('Wealth Path Fan Chart (relative growth)')
        plt.xlabel('Period')
        plt.ylabel('Growth')
        plt.legend()
        plt.tight_layout()
        plt.show()
else:
    print('Run Monte Carlo section first')