In [1]:
import pandas as pd
import yfinance as yf
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns

In [None]:
# Step 1: Read the CSV file for stock tickers
csv_path = "results/top10portfoliopick.csv"
stock_list = pd.read_csv(csv_path)

# Extract stock tickers from the correct column
tickers = stock_list.iloc[:, 0].tolist()

# Step 2: Define the periods
periods = [
    ("2015-04-24", "2017-04-24"),
    ("2013-10-09", "2015-10-08"),
    ("2012-07-06", "2014-07-09"),
    ("2016-07-05", "2018-07-03"),
    ("2016-11-22", "2018-11-21"),
    ("2015-11-23", "2017-11-21"),
    ("2011-05-12", "2013-05-14"),
    ("2010-11-17", "2012-11-17"),
    ("2016-08-19", "2018-08-20"),
    ("2015-05-15", "2017-05-15")
]

# Step 3: Prepare empty lists to hold results
all_results = []

# Helper functions
def calculate_returns(prices):
    return prices.pct_change().dropna()

def calculate_portfolio_value(weights, returns):
    return (1 + returns.dot(weights)).cumprod()

def calculate_sharpe_ratio(returns, risk_free_rate=0):
    return (returns.mean() - risk_free_rate) / returns.std() * np.sqrt(252)

def calculate_max_drawdown(portfolio_value):
    return (portfolio_value / portfolio_value.cummax() - 1).min()

# Custom functions to replace R functionalities
def backtest_selector(result, measures):
    """Python equivalent of backtestSelector"""
    fig, axes = plt.subplots(len(measures), 1, figsize=(10, 5*len(measures)))
    for i, measure in enumerate(measures):
        axes[i].plot(result[measure])
        axes[i].set_title(measure)
        axes[i].set_xlabel('Date')
        axes[i].set_ylabel('Value')
    plt.tight_layout()
    plt.show()

def backtest_table(results, measures):
    """Python equivalent of backtestTable"""
    data = {measure: [result[measure] for result in results] for measure in measures}
    df = pd.DataFrame(data, index=[result['period'] for result in results])
    print(df)
    return df

In [None]:
# Step 4: Loop through each period and perform the backtest
for start_date, end_date in periods:
    # Fetch historical data for the stocks in the given period
    data = yf.download(tickers, start=start_date, end=end_date)['Adj Close']
    
    # Calculate returns
    returns = calculate_returns(data)
    
    # Implement buy-and-hold strategy (equal weights)
    weights = np.array([1/len(tickers)] * len(tickers))
    
    # Calculate portfolio value
    portfolio_value = calculate_portfolio_value(weights, returns)
    
    # Calculate performance metrics
    sharpe_ratio = calculate_sharpe_ratio(portfolio_value.pct_change().dropna())
    max_drawdown = calculate_max_drawdown(portfolio_value)
    
    # Store results
    all_results.append({
        'period': f"{start_date}_to_{end_date}",
        'portfolio_value': portfolio_value,
        'Sharpe ratio': sharpe_ratio,
        'max drawdown': max_drawdown
    })

# Step 5: View backtest results for all periods
for result in all_results:
    print(f"\nBacktest results for period: {result['period']}")
    print(f"Sharpe Ratio: {result['Sharpe ratio']:.4f}")
    print(f"Max Drawdown: {result['max drawdown']:.4f}")
    
    # Use backtest_selector (equivalent to backtestSelector in R)
    backtest_selector(result, measures=['portfolio_value', 'Sharpe ratio', 'max drawdown'])

# Use backtest_table (equivalent to backtestTable in R)
summary_table = backtest_table(all_results, measures=['Sharpe ratio', 'max drawdown'])

# Step 6: Calculate and display the median Sharpe ratio and max drawdown
median_sharpe = np.median([r['Sharpe ratio'] for r in all_results])
median_drawdown = np.median([r['max drawdown'] for r in all_results])

print(f"\nMedian Sharpe Ratio across all periods: {median_sharpe:.4f}")
print(f"Median Max Drawdown across all periods: {median_drawdown:.4f}")

# Step 7: Summarize results for all periods
summary_df = pd.DataFrame({
    'period': [r['period'] for r in all_results],
    'sharpe_ratio': [r['Sharpe ratio'] for r in all_results],
    'max_drawdown': [r['max drawdown'] for r in all_results]
})

# Save to CSV file
summary_df.to_csv("results/backtest_results.csv", index=False)

print("\nBacktest summary:")
print(summary_df)

# Additional visualization: heatmap of performance metrics across periods
plt.figure(figsize=(12, 8))
sns.heatmap(summary_df.set_index('period')[['sharpe_ratio', 'max_drawdown']], annot=True, cmap='coolwarm')
plt.title('Performance Metrics Across Periods')
plt.show()