In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import vectorbt as vbt

print('hello world')

ImportError: Numba needs NumPy 2.1 or less. Got NumPy 2.2.

In [None]:
# Load the S&P 500 data
df_sp500 = pd.read_csv('data/sp500_2015-2025.csv')

# Convert the 'Date' column to datetime format for better plotting
df_sp500['Date'] = pd.to_datetime(df_sp500['Date'])

# Sort the dataframe by date (ascending order)
df_sp500 = df_sp500.sort_values('Date')

# Conver string to float
df_sp500['Close/Last'] = df_sp500['Close/Last'].astype(float)
df_sp500['Open'] = df_sp500['Open'].astype(float)
df_sp500['High'] = df_sp500['High'].astype(float)
df_sp500['Low'] = df_sp500['Low'].astype(float)

# Display the first few rows of the data
df_sp500.head()

In [None]:
# Plot Simple Moving Averages with Derivatives

window_sizes = [30, 200]
colors = ['red', 'blue']

# Moving Averages
for window in window_sizes:
    df_sp500[f'{window}d_ma'] = df_sp500['Close/Last'].rolling(window=window).mean()

# 1st Derivatives
for window in window_sizes:
    df_sp500[f'{window}d_ma_derivative'] = df_sp500[f'{window}d_ma'].diff()

# 2nd Derivatives
for window in window_sizes:
    df_sp500[f'{window}d_ma_derivative2'] = df_sp500[f'{window}d_ma_derivative'].diff()

# Find local minima (where 1st derivative crosses zero from negative to positive and 2nd derivative is positive)
for window in window_sizes:
    # Create a shifted version of the derivative to detect crossings
    df_sp500[f'{window}d_ma_derivative_shifted'] = df_sp500[f'{window}d_ma_derivative'].shift(1)
    
    # Find where derivative crosses from negative to positive and 2nd derivative is positive
    minima_mask = (df_sp500[f'{window}d_ma_derivative_shifted'] < 0) & \
                  (df_sp500[f'{window}d_ma_derivative'] > 0) & \
                  (df_sp500[f'{window}d_ma_derivative2'] > 0)
    
    # Store the minima points
    df_sp500[f'{window}d_ma_minima'] = minima_mask

# Create a figure with 3 subplots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(14, 15), sharex=True, gridspec_kw={'height_ratios': [2, 1, 1]})

# Plot 1: Closing prices and moving averages
ax1.plot(df_sp500['Date'], df_sp500['Close/Last'], label='Close Price', color='black', alpha=0.7)

# Plot the moving averages and their minima
for window, color in zip(window_sizes, colors):
    ax1.plot(df_sp500['Date'], df_sp500[f'{window}d_ma'], label=f'{window}d MA', color=color, alpha=0.7)
    
    # Plot minima points
    minima_points = df_sp500[df_sp500[f'{window}d_ma_minima']]
    ax1.scatter(minima_points['Date'], minima_points[f'{window}d_ma'], 
                color=color, s=25, marker='x', 
                label=f'{window}d MA Local Minima')

# Customize the first plot
ax1.set_title('S&P 500 Close/Last Value and Simple Moving Averages with Local Minima')
ax1.set_ylabel('Value')
ax1.grid(True, alpha=0.3)
ax1.legend()

# Plot 2: Derivatives of moving averages
for window, color in zip(window_sizes, colors):
    ax2.plot(df_sp500['Date'], df_sp500[f'{window}d_ma_derivative'], label=f'{window}d MA Derivative', color=color)
    
    # Highlight zero crossings (minima points)
    minima_points = df_sp500[df_sp500[f'{window}d_ma_minima']]
    ax2.scatter(minima_points['Date'], minima_points[f'{window}d_ma_derivative'], 
                color=color, s=25, marker='x',)

# Add a horizontal line at y=0 for reference
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)

# Customize the second plot
ax2.set_title('Derivatives of Moving Averages')
ax2.set_xlabel('Date')
ax2.set_ylabel('Velocity')
ax2.grid(True, alpha=0.3)
ax2.legend()

# Plot 3: 2nd Derivatives of moving averages
for window, color in zip(window_sizes, colors):
    ax3.plot(df_sp500['Date'], df_sp500[f'{window}d_ma_derivative2'], label=f'{window}d MA 2nd Derivative', color=color)
    
    # Highlight minima points on 2nd derivative
    minima_points = df_sp500[df_sp500[f'{window}d_ma_minima']]
    ax3.scatter(minima_points['Date'], minima_points[f'{window}d_ma_derivative2'], 
                color=color, s=25, marker='x')

# Add a horizontal line at y=0 for reference
ax3.axhline(y=0, color='black', linestyle='-', alpha=0.3)

# Customize the third plot
ax3.set_title('2nd Derivatives of Moving Averages')
ax3.set_xlabel('Date')
ax3.set_ylabel('Acceleration')
ax3.grid(True, alpha=0.3)
ax3.legend()

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
# Simulate DCA strategy

# Filter for only Fridays (weekday 4 is Friday in Python)
fridays = df_sp500[df_sp500['Date'].dt.weekday == 4]

# Initialize variables for DCA strategy
initial_investment = 5000
weekly_investment = 100
total_invested = initial_investment
shares_owned = initial_investment / fridays.iloc[0]['Close/Last']
investment_value = []
investment_cost = []
dates = []

# Start with initial investment
dates.append(fridays.iloc[0]['Date'])
investment_value.append(shares_owned * fridays.iloc[0]['Close/Last'])
investment_cost.append(total_invested)

# Simulate buying $100 worth of shares every Friday
for i in range(1, len(fridays)):
    # Buy more shares with weekly investment
    new_shares = weekly_investment / fridays.iloc[i]['Close/Last']
    shares_owned += new_shares
    total_invested += weekly_investment
    
    # Record data
    dates.append(fridays.iloc[i]['Date'])
    investment_value.append(shares_owned * fridays.iloc[i]['Close/Last'])
    investment_cost.append(total_invested)

# Create a dataframe to hold our results
dca_results = pd.DataFrame({
    'Date': dates,
    'Total Invested': investment_cost,
    'Portfolio Value': investment_value,
    'Profit/Loss': np.array(investment_value) - np.array(investment_cost),
    'Return %': ((np.array(investment_value) - np.array(investment_cost)) / np.array(investment_cost)) * 100
})

# Plot the results
plt.figure(figsize=(14, 8))

# Plot total invested vs portfolio value
plt.plot(dca_results['Date'], dca_results['Total Invested'], label='Total Invested ($)', color='blue')
plt.plot(dca_results['Date'], dca_results['Portfolio Value'], label='Portfolio Value ($)', color='green')

# Highlight profit/loss region
plt.fill_between(dca_results['Date'], dca_results['Total Invested'], dca_results['Portfolio Value'], 
                 where=(dca_results['Portfolio Value'] >= dca_results['Total Invested']),
                 color='green', alpha=0.3, label='Profit Region')
plt.fill_between(dca_results['Date'], dca_results['Total Invested'], dca_results['Portfolio Value'], 
                 where=(dca_results['Portfolio Value'] <= dca_results['Total Invested']),
                 color='red', alpha=0.3, label='Loss Region')

# Customize the plot
plt.title('Dollar Cost Averaging: (5,000 + 100x) Investment', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Value ($)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.legend()

# Show the plot
plt.tight_layout()
plt.show()

# Display summary statistics
print("DCA Strategy Summary:")
print(f"Start Date: {dca_results['Date'].min().strftime('%Y-%m-%d')}")
print(f"End Date: {dca_results['Date'].max().strftime('%Y-%m-%d')}")
print(f"Total Amount Invested: ${dca_results['Total Invested'].iloc[-1]:.2f}")
print(f"Final Portfolio Value: ${dca_results['Portfolio Value'].iloc[-1]:.2f}")
profit_loss = dca_results['Portfolio Value'].iloc[-1] - dca_results['Total Invested'].iloc[-1]
print(f"Total Profit/Loss: ${profit_loss:.2f} ({profit_loss/dca_results['Total Invested'].iloc[-1]*100:.2f}%)")
print(f"Total Number of Shares Owned: {shares_owned:.2f}")
print(f"Average Cost Per Share: ${dca_results['Total Invested'].iloc[-1]/shares_owned:.2f}")
print(f"Final Share Price: ${fridays.iloc[-1]['Close/Last']:.2f}")

In [None]:
# Implement and backtest the MA strategy with vectorbt

def backtest_ma_strategy(
    df: pd.DataFrame,
    short_window: int = 20,
    long_window: int = 50,
    derivative_window: int = 1,
    local_min_order: int = 5,
    drawdown_limit: float = 0.05
) -> vbt.Portfolio:
    """
    Backtests a moving average crossover strategy with derivative filters and drawdown stop.

    Parameters:
    - df: DataFrame with a 'Close' column indexed by datetime.
    - short_window: lookback period for the short moving average.
    - long_window: lookback period for the long moving average.
    - derivative_window: period for computing first/second derivatives.
    - local_min_order: window order for detecting local minima on short MA.
    - drawdown_limit: maximum drawdown threshold (as decimal) to trigger stop-loss.

    Returns:
    - A vectorbt Portfolio object with backtest results.
    """
    # Compute moving averages
    ma_short = df['Close'].rolling(short_window).mean()
    ma_long = df['Close'].rolling(long_window).mean()

    # Compute derivatives
    first_derivative = ma_short.diff(derivative_window)
    second_derivative = first_derivative.diff(derivative_window)

    # Detect local minima on short MA
    local_min_idx = argrelextrema(ma_short.values, np.less, order=local_min_order)[0]
    local_min = pd.Series(False, index=df.index)
    local_min.iloc[local_min_idx] = True

    # Entry signal: short MA above long MA, positive first derivative, not at local minimum
    entries = (ma_short > ma_long) & (first_derivative > 0) & (~local_min)

    # Exit signal: short MA below long MA or negative first derivative
    exits = (ma_short < ma_long) | (first_derivative < 0)

    # Backtest with drawdown stop-loss
    pf = vbt.Portfolio.from_signals(
        close=df['Close'],
        entries=entries,
        exits=exits,
        freq='D',
        init_cash=10_000,
        fees=0.001,
        stop_loss=drawdown_limit,
        trailing_stop=False
    )

    return pf

# Prepare the S&P 500 data for the strategy
# Create a copy of the dataframe with renamed columns to match the strategy requirements
strategy_df = df_sp500.copy()
strategy_df = strategy_df.rename(columns={'Close/Last': 'Close'})
strategy_df = strategy_df.set_index('Date')

# Run the backtest with different parameter combinations
short_windows = [20, 30]
long_windows = [50, 200]

# Store results for comparison
results = {}

for short_window in short_windows:
    for long_window in long_windows:
        if short_window >= long_window:
            continue  # Skip invalid combinations
            
        strategy_name = f"MA_{short_window}_{long_window}"
        results[strategy_name] = backtest_ma_strategy(
            strategy_df,
            short_window=short_window,
            long_window=long_window,
            derivative_window=1,
            local_min_order=5,
            drawdown_limit=0.05
        )
        
        # Print strategy stats
        print(f"\n--- Strategy: {strategy_name} ---")
        print(results[strategy_name].stats())

# Plot the equity curves for comparison
fig, ax = plt.subplots(figsize=(14, 8))

for strategy_name, pf in results.items():
    pf.plot_value(ax=ax, label=strategy_name)

ax.set_title('MA Crossover Strategy Comparison')
ax.set_ylabel('Portfolio Value ($)')
ax.grid(True, alpha=0.3)
ax.legend()
plt.tight_layout()
plt.show()

# Find the best strategy based on Sharpe ratio
best_strategy = max(results.items(), key=lambda x: x[1].stats()['sharpe_ratio'])
best_strategy_name, best_pf = best_strategy

print(f"Best strategy: {best_strategy_name}")
print(f"Sharpe ratio: {best_pf.stats()['sharpe_ratio']:.4f}")
print(f"Total return: {best_pf.stats()['total_return']:.2%}")
print(f"Max drawdown: {best_pf.stats()['max_drawdown']:.2%}")

# Extract the parameters from the strategy name
short_window, long_window = map(int, best_strategy_name.split('_')[1:])

# Recreate the signals for visualization
ma_short = strategy_df['Close'].rolling(short_window).mean()
ma_long = strategy_df['Close'].rolling(long_window).mean()
first_derivative = ma_short.diff(1)
second_derivative = first_derivative.diff(1)

local_min_idx = argrelextrema(ma_short.values, np.less, order=5)[0]
local_min = pd.Series(False, index=strategy_df.index)
local_min.iloc[local_min_idx] = True

entries = (ma_short > ma_long) & (first_derivative > 0) & (~local_min)
exits = (ma_short < ma_long) | (first_derivative < 0)

# Create a figure with 4 subplots
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(14, 20), sharex=True, 
                                         gridspec_kw={'height_ratios': [2, 1, 1, 2]})

# Plot 1: Price and moving averages with entry/exit points
ax1.plot(strategy_df.index, strategy_df['Close'], label='Close Price', color='black', alpha=0.7)
ax1.plot(strategy_df.index, ma_short, label=f'{short_window}d MA', color='red', alpha=0.7)
ax1.plot(strategy_df.index, ma_long, label=f'{long_window}d MA', color='blue', alpha=0.7)

# Mark entry and exit points
entry_points = strategy_df.index[entries]
exit_points = strategy_df.index[exits]
ax1.scatter(entry_points, strategy_df.loc[entry_points, 'Close'], color='green', s=50, marker='^', label='Buy Signal')
ax1.scatter(exit_points, strategy_df.loc[exit_points, 'Close'], color='red', s=50, marker='v', label='Sell Signal')

ax1.set_title(f'S&P 500 with {short_window}/{long_window} MA Crossover Strategy')
ax1.set_ylabel('Price')
ax1.grid(True, alpha=0.3)
ax1.legend()

# Plot 2: First derivative
ax2.plot(strategy_df.index, first_derivative, label='First Derivative', color='purple')
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
ax2.set_title('First Derivative of Short MA')
ax2.set_ylabel('Velocity')
ax2.grid(True, alpha=0.3)

# Plot 3: Second derivative
ax3.plot(strategy_df.index, second_derivative, label='Second Derivative', color='orange')
ax3.axhline(y=0, color='black', linestyle='-', alpha=0.3)
ax3.set_title('Second Derivative of Short MA')
ax3.set_ylabel('Acceleration')
ax3.grid(True, alpha=0.3)

# Plot 4: Portfolio value and drawdown
best_pf.plot_value(ax=ax4, label='Portfolio Value')
ax4.set_title('Portfolio Performance')
ax4.set_ylabel('Value ($)')
ax4.set_xlabel('Date')
ax4.grid(True, alpha=0.3)
ax4.legend()

# Show the plot
plt.tight_layout()
plt.show()

# Display detailed trade analysis
print("\nDetailed Trade Analysis:")
trades = best_pf.trades
print(f"Number of trades: {len(trades)}")
print(f"Win rate: {trades.win_rate:.2%}")
print(f"Average trade return: {trades.returns.mean():.2%}")
print(f"Average winning trade: {trades.returns[trades.returns > 0].mean():.2%}")
print(f"Average losing trade: {trades.returns[trades.returns < 0].mean():.2%}")
print(f"Profit factor: {abs(trades.returns[trades.returns > 0].sum() / trades.returns[trades.returns < 0].sum()):.2f}")

# Plot the trade durations and returns
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

trades.duration.plot.hist(ax=ax1, bins=20)
ax1.set_title('Trade Duration Distribution')
ax1.set_xlabel('Duration (days)')
ax1.set_ylabel('Frequency')

trades.returns.plot.hist(ax=ax2, bins=20)
ax2.set_title('Trade Returns Distribution')
ax2.set_xlabel('Return (%)')
ax2.set_ylabel('Frequency')
ax2.axvline(x=0, color='black', linestyle='--')

plt.tight_layout()
plt.show()