# Corporate Actions and Missing Data Policy Demo

This notebook demonstrates:
1. How finbot handles corporate actions (splits, dividends)
2. How to configure missing data policies
3. Practical examples with real data

**Last Updated:** 2026-02-16

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yfinance as yf

from finbot.core.contracts import BacktestRunRequest, MissingDataPolicy
from finbot.services.backtesting.adapters.backtrader_adapter import BacktraderAdapter

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 2)
%matplotlib inline

## Part 1: Understanding Adjusted Prices

Let's examine how adjusted prices handle a real stock split.

In [None]:
# Apple had a 4:1 stock split on August 31, 2020
aapl = yf.download("AAPL", start="2020-07-01", end="2020-10-01", progress=False)

# Display data around the split date
split_date = pd.Timestamp("2020-08-31")
window = aapl.loc[split_date - pd.Timedelta(days=5):split_date + pd.Timedelta(days=5)]

print("Apple data around 4:1 split (2020-08-31):")
print(window[["Open", "High", "Low", "Close", "Adj Close", "Volume"]].round(2))

In [None]:
# Visualize the difference between Close and Adj Close
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# Unadjusted Close - shows the split jump
ax1.plot(aapl.index, aapl["Close"], label="Close (Unadjusted)", linewidth=2)
ax1.axvline(split_date, color='red', linestyle='--', label='Split Date')
ax1.set_title("Unadjusted Close Price - Shows 4:1 Split Jump", fontsize=14)
ax1.set_ylabel("Price ($)")
ax1.legend()
ax1.grid(True, alpha=0.3)

# Adjusted Close - smooth, accounts for split
ax2.plot(aapl.index, aapl["Adj Close"], label="Adj Close (Split-Adjusted)", 
         color='green', linewidth=2)
ax2.axvline(split_date, color='red', linestyle='--', label='Split Date')
ax2.set_title("Adjusted Close Price - Continuous, No Jump", fontsize=14)
ax2.set_ylabel("Price ($)")
ax2.set_xlabel("Date")
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nKey Observation:")
print("- Unadjusted Close shows ~75% drop on split date (4x reduction in price)")
print("- Adjusted Close is smooth and continuous")
print("- Finbot uses Adj Close for accurate backtesting")

In [None]:
# Run backtest across the split - should handle seamlessly
adapter = BacktraderAdapter(price_histories={"AAPL": aapl})

request = BacktestRunRequest(
    strategy_name="NoRebalance",
    symbols=("AAPL",),
    start=None,
    end=None,
    initial_cash=10000.0,
    parameters={"equity_proportions": [1.0]},
)

result = adapter.run(request)

print("\nBacktest Results (across split):")
print(f"Starting Value: ${result.metrics['starting_value']:,.2f}")
print(f"Ending Value:   ${result.metrics['ending_value']:,.2f}")
print(f"ROI:            {result.metrics['roi']:.2%}")
print(f"CAGR:           {result.metrics['cagr']:.2%}")
print("\n✅ Split handled automatically via adjusted prices")

## Part 2: Missing Data Policies

Let's create synthetic data with gaps and test different policies.

In [None]:
# Create sample data with intentional gaps
def create_data_with_gaps():
    dates = pd.bdate_range("2020-01-01", periods=100)
    np.random.seed(42)
    prices = 100 + np.cumsum(np.random.randn(100) * 0.5)
    
    df = pd.DataFrame(
        {
            "Open": prices * 0.99,
            "High": prices * 1.01,
            "Low": prices * 0.98,
            "Close": prices,
            "Adj Close": prices,
            "Volume": 1000000,
        },
        index=dates,
    )
    
    # Introduce two gaps
    gap_cols = ["Open", "High", "Low", "Close", "Adj Close"]
    df.loc[df.index[30:35], gap_cols] = np.nan  # 5-day gap
    df.loc[df.index[70:73], gap_cols] = np.nan  # 3-day gap
    
    return df

df_with_gaps = create_data_with_gaps()

# Check for missing data
null_counts = df_with_gaps.isnull().sum()
print("Missing data summary:")
print(null_counts[null_counts > 0])
print(f"\nTotal rows: {len(df_with_gaps)}")
print(f"Rows with missing data: {df_with_gaps.isnull().any(axis=1).sum()}")

In [None]:
# Visualize the gaps
plt.figure(figsize=(12, 6))
plt.plot(df_with_gaps.index, df_with_gaps["Close"], 'o-', label="Close Price", markersize=4)
plt.title("Price Data with Missing Values (Gaps)", fontsize=14)
plt.xlabel("Date")
plt.ylabel("Price ($)")
plt.legend()
plt.grid(True, alpha=0.3)

# Highlight gaps
gap_mask = df_with_gaps["Close"].isnull()
gap_dates = df_with_gaps.index[gap_mask]
for date in gap_dates:
    plt.axvline(date, color='red', alpha=0.3, linestyle='--')

plt.tight_layout()
plt.show()

print("Red dashed lines indicate missing data points")

### Policy 1: FORWARD_FILL (Default)

In [None]:
# Test FORWARD_FILL policy
adapter_ffill = BacktraderAdapter(
    price_histories={"STOCK": df_with_gaps},
    missing_data_policy=MissingDataPolicy.FORWARD_FILL,
)

# Apply policy to see the effect
df_ffill = adapter_ffill._apply_missing_data_policy(df_with_gaps.copy(), "STOCK")

print("FORWARD_FILL Policy:")
print(f"Original rows: {len(df_with_gaps)}")
print(f"After fill rows: {len(df_ffill)}")
print(f"Null values remaining: {df_ffill.isnull().sum().sum()}")

# Show filled values
print("\nGap 1 (rows 30-34):")
print(df_ffill.iloc[28:37][["Close"]].round(2))
print("Notice: Gap filled with value from row 29 (98.36)")

### Policy 2: DROP

In [None]:
# Test DROP policy
adapter_drop = BacktraderAdapter(
    price_histories={"STOCK": df_with_gaps},
    missing_data_policy=MissingDataPolicy.DROP,
)

df_drop = adapter_drop._apply_missing_data_policy(df_with_gaps.copy(), "STOCK")

print("DROP Policy:")
print(f"Original rows: {len(df_with_gaps)}")
print(f"After drop rows: {len(df_drop)}")
print(f"Rows removed: {len(df_with_gaps) - len(df_drop)}")
print(f"Null values remaining: {df_drop.isnull().sum().sum()}")

print("\nEffect: Rows with any missing values are completely removed")
print("Result: Clean data but reduced dataset size")

### Policy 3: INTERPOLATE

In [None]:
# Test INTERPOLATE policy
adapter_interp = BacktraderAdapter(
    price_histories={"STOCK": df_with_gaps},
    missing_data_policy=MissingDataPolicy.INTERPOLATE,
)

df_interp = adapter_interp._apply_missing_data_policy(df_with_gaps.copy(), "STOCK")

print("INTERPOLATE Policy:")
print(f"Original rows: {len(df_with_gaps)}")
print(f"After interpolation rows: {len(df_interp)}")
print(f"Null values remaining: {df_interp.isnull().sum().sum()}")

# Show interpolated values
print("\nGap 1 (rows 30-34):")
print(df_interp.iloc[28:37][["Close"]].round(2))
print("Notice: Values smoothly interpolated between row 29 and row 35")

In [None]:
# Compare policies visually
fig, axes = plt.subplots(3, 1, figsize=(12, 10), sharex=True)

# Focus on first gap area
focus_range = slice(25, 40)

# Original with gaps
axes[0].plot(df_with_gaps.index[focus_range], df_with_gaps["Close"].iloc[focus_range], 
             'o-', label="Original (with gaps)", markersize=6)
axes[0].set_title("Original Data with Gaps", fontsize=12)
axes[0].set_ylabel("Price ($)")
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Forward Fill
axes[1].plot(df_ffill.index[focus_range], df_ffill["Close"].iloc[focus_range], 
             'o-', label="Forward Fill", color='orange', markersize=6)
axes[1].set_title("FORWARD_FILL Policy - Flat During Gap", fontsize=12)
axes[1].set_ylabel("Price ($)")
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Interpolate
axes[2].plot(df_interp.index[focus_range], df_interp["Close"].iloc[focus_range], 
             'o-', label="Interpolate", color='green', markersize=6)
axes[2].set_title("INTERPOLATE Policy - Smooth Linear Fill", fontsize=12)
axes[2].set_ylabel("Price ($)")
axes[2].set_xlabel("Date")
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

### Policy 4: ERROR

In [None]:
# Test ERROR policy
adapter_error = BacktraderAdapter(
    price_histories={"STOCK": df_with_gaps},
    missing_data_policy=MissingDataPolicy.ERROR,
)

request = BacktestRunRequest(
    strategy_name="NoRebalance",
    symbols=("STOCK",),
    start=None,
    end=None,
    initial_cash=10000.0,
    parameters={"equity_proportions": [1.0]},
)

try:
    result = adapter_error.run(request)
    print("Unexpected: Backtest succeeded despite missing data")
except ValueError as e:
    print("ERROR Policy Result:")
    print("❌ Backtest failed as expected")
    print(f"\nError message:\n{e}")
    print("\nThis policy is useful for catching data quality issues early")

## Part 3: Backtest Comparison Across Policies

In [None]:
# Run backtests with different policies
policies_to_test = [
    (MissingDataPolicy.FORWARD_FILL, "Forward Fill"),
    (MissingDataPolicy.DROP, "Drop"),
    (MissingDataPolicy.INTERPOLATE, "Interpolate"),
]

results_comparison = []

for policy, name in policies_to_test:
    adapter = BacktraderAdapter(
        price_histories={"STOCK": df_with_gaps},
        missing_data_policy=policy,
    )
    
    request = BacktestRunRequest(
        strategy_name="NoRebalance",
        symbols=("STOCK",),
        start=None,
        end=None,
        initial_cash=10000.0,
        parameters={"equity_proportions": [1.0]},
    )
    
    result = adapter.run(request)
    
    results_comparison.append({
        "Policy": name,
        "Ending Value": result.metrics["ending_value"],
        "ROI": result.metrics["roi"],
        "CAGR": result.metrics["cagr"],
        "Sharpe": result.metrics["sharpe"],
        "Max Drawdown": result.metrics["max_drawdown"],
    })

comparison_df = pd.DataFrame(results_comparison)
print("\nBacktest Results Comparison:")
print("=" * 80)
print(comparison_df.to_string(index=False))
print("=" * 80)
print("\nObservations:")
print("- Different policies can produce different results")
print("- Forward Fill assumes price stays constant during gaps")
print("- Drop removes data points entirely")
print("- Interpolate assumes linear price movement")
print("- Choice of policy depends on your data quality and research requirements")

## Part 4: Real-World Example with Dividends

Let's look at a high-dividend stock to see dividend adjustment in action.

In [None]:
# AT&T (T) pays quarterly dividends
t_stock = yf.download("T", start="2020-01-01", end="2020-12-31", progress=False)

# Find dividend dates
if "Dividends" in t_stock.columns:
    dividend_dates = t_stock[t_stock["Dividends"] > 0]
    print("AT&T Dividend Payments in 2020:")
    print(dividend_dates[["Close", "Adj Close", "Dividends"]].round(2))
    
    # Calculate total dividends
    total_div = dividend_dates["Dividends"].sum()
    print(f"\nTotal dividends paid: ${total_div:.2f} per share")
    
    # Check adjustment
    start_close = t_stock.iloc[0]["Close"]
    start_adj = t_stock.iloc[0]["Adj Close"]
    
    print("\nPrice adjustment for dividends:")
    print(f"First day Close: ${start_close:.2f}")
    print(f"First day Adj Close: ${start_adj:.2f}")
    print(f"Difference: ${start_close - start_adj:.2f}")
    print("\nAdj Close accounts for dividends paid throughout the year")
else:
    print("Dividend data not available in dataset")

## Summary

### Key Takeaways

1. **Adjusted Prices**: Always use adjusted prices for accurate backtesting
   - Accounts for splits and dividends automatically
   - YFinance provides `Adj Close` by default
   - Finbot uses `Adj Close` when available

2. **Missing Data Policies**: Choose based on your needs
   - `FORWARD_FILL` (default): Good for most cases, assumes price holds
   - `DROP`: Strictest data quality, may lose data
   - `ERROR`: Catch issues early, fails on any gap
   - `INTERPOLATE`: Smoother fills, assumes linear movement
   - `BACKFILL`: Rarely used, introduces look-ahead bias

3. **Best Practices**:
   - Always check for missing data before backtesting
   - Document which policy you use
   - Understand the assumptions each policy makes
   - Validate results make sense given corporate actions

### Further Reading

- [Corporate Actions and Data Quality User Guide](../docs/user-guides/corporate-actions-and-data-quality.md)
- [BacktraderAdapter API Reference](../docs/api/backtesting/backtrader_adapter.md)
- [Parity Testing Documentation](../docs/research/adapter-migration-status-2026-02-16.md)