In [1]:
import warnings
warnings.filterwarnings('ignore')
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Define a function to get data given a list of tickers and clean the data
def get_tickers(filepath, start_date = "2015-01-01", end_date = "2023-12-15"):
    tickers = []
    with open(filepath) as file:
        for line in file:
            tickers.append(line.strip())
    data = yf.download(tickers, start = start_date, end = end_date).sort_index()['Adj Close']
    clean_data = data[data.isna().sum(axis = 1) != data.shape[1]]
    clean_data = clean_data.dropna(axis = 1)
    return clean_data

In [3]:
# Define a function to get the n% least volatile stocks in a DataFrame over the past t months
def get_n_least_volatile(stock_data, date, t = 36, n = 10):
    date = pd.to_datetime(date)
    start_date = date - pd.DateOffset(months=t)
    data = stock_data.copy()[start_date:date]
    ticker_vols = dict()
    for ticker in data.columns:
        return_df = np.array(data[ticker].pct_change(periods=5))[5:]
        vol = return_df.std()
        ticker_vols[ticker] = vol
    sorted_tickers = sorted(ticker_vols.items(), key = lambda x: x[1])
    return [x[0] for x in sorted_tickers[:n]]


In [28]:
def construct_portfolio(stock_data, start_date = "2021-01-01", end_date = "2023-12-15", percentage_of_all_stocks_to_buy = 0.1):
    portfolio = pd.DataFrame(data = np.zeros([len(stock_data),len(stock_data.columns)]), index = stock_data.index, columns=stock_data.columns)
    num_stocks = int(stock_data.shape[1] // (1 / percentage_of_all_stocks_to_buy))
    weight = 1 / num_stocks
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    current_month = start_date.month - 1
    for date in stock_data[start_date : end_date].index:
        if date.month != current_month:
            stocks_in_portfolio = get_n_least_volatile(stock_data = stock_data, date = date, n = num_stocks)
            current_month = date.month
            # print(f"Date: {date}. Reshuffling!")
            # print(f"Least Volatile Stocks:{stocks_in_portfolio}")
        for ticker in stocks_in_portfolio:
            portfolio.loc[date, ticker] = weight
    return portfolio[start_date : end_date]
        

In [65]:
def backtest(stock_data, portfolio, start_date = "2021-01-01", end_date = "2023-12-15"):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    returns = pd.DataFrame(index = portfolio.index)
    return_df = (stock_data).pct_change().shift(-1)[start_date : end_date].dropna()
    returns["daily_returns"] = (return_df * portfolio).sum(axis=1)
    returns["cumulative_returns"] = (returns["daily_returns"] + 1).cumprod() - 1
    returns["cumulative_%_returns"] = (returns["cumulative_returns"]) * 100
    returns['overall_returns'] = return_df.sum(axis=1)/(return_df.shape[1])
    returns["overall_cumulative_returns"] = (returns["overall_returns"] + 1).cumprod() - 1
    returns["overall_cumulative_%_returns"] = (returns["overall_cumulative_returns"]) * 100
    return returns


In [84]:
# Function to Calculate Drawdowns
def calculate_drawdown(cumulative_returns):
    cumulative_returns += 1
    running_max = np.maximum.accumulate(cumulative_returns.dropna())
    running_max[running_max < 1] = 1
    drawdown = (cumulative_returns / running_max - 1) * 100
    return drawdown.min(), drawdown

# Function to Plot Returns
def plot_returns(returns):
    plt.figure(figsize=(12, 6))
    plt.plot(returns['cumulative_%_returns'], label='Cumulative % Returns of the Least Volatile Stocks', color='forestgreen', linewidth=2, linestyle='-')
    plt.plot(returns['overall_cumulative_%_returns'], label='Cumulative % Returns of all Stocks', color='firebrick', linewidth=2, linestyle='-')
    plt.title('Cumulative Strategy vs Index Returns Over Time', fontsize=16)
    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Cumulative Returns (%)', fontsize=14)
    plt.legend()
    plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
    plt.show()

# Function to Plot Drawdowns
def plot_drawdowns(returns):
    _, drawdown_1 = calculate_drawdown(returns['cumulative_returns'])
    _, drawdown_2 = calculate_drawdown(returns['overall_cumulative_returns'])
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Drawdown plot for strategy
    ax.plot(drawdown_1.index, drawdown_1.values, color='forestgreen', label='Drawdown of the Least Volatile Stocks')
    ax.fill_between(drawdown_1.index, drawdown_1.values, color='forestgreen', alpha=0.3)

    # Drawdown plot for overall returns
    ax.plot(drawdown_2.index, drawdown_2.values, color='firebrick', label='Drawdown of all Stocks')
    ax.fill_between(drawdown_2.index, drawdown_2.values, color='firebrick', alpha=0.3)

    # Setting title, labels, and grid
    ax.set_title('Comparison of Drawdowns', fontsize=16)
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Drawdown(%)', fontsize=12)
    ax.tick_params(axis='x', labelsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.legend()

    plt.tight_layout()
    plt.show()

In [None]:
def print_metrics(returns):
    sharpe1 = round(returns['daily_returns'].mean() / returns['daily_returns'].std() * np.sqrt(252), 2)
    sharpe2 = round(returns['overall_returns'].mean() / returns['overall_returns'].std() * np.sqrt(252), 2)

In [None]:
# Get the Data
filepath = './s&p500.txt'
stock_data = get_tickers(filepath)

In [67]:
# Run the strategy
start_date = "2020-01-01"
end_date = "2023-12-15"
portfolio = construct_portfolio(stock_data, start_date = start_date, end_date = end_date, percentage_of_all_stocks_to_buy = 0.1)
returns = backtest(stock_data, portfolio, start_date = start_date, end_date = end_date)

[*********************100%***********************]  500 of 500 completed

100 Failed downloads:
- WLP: No data found for this date range, symbol may be delisted
- COH: No data found for this date range, symbol may be delisted
- NBL: No data found, symbol may be delisted
- SIAL: No data found for this date range, symbol may be delisted
- FISV: No data found, symbol may be delisted
- MOLX: No data found for this date range, symbol may be delisted
- CFN: No data found for this date range, symbol may be delisted
- LTD: No data found for this date range, symbol may be delisted
- LLL: No data found, symbol may be delisted
- WIN: No data found, symbol may be delisted
- DISCA: No data found, symbol may be delisted
- QEP: No data found, symbol may be delisted
- BRCM: No data found for this date range, symbol may be delisted
- KFT: No data found for this date range, symbol may be delisted
- BRK.B: No data found, symbol may be delisted
- SPLS: No data found for this date range, symbol may be deli

In [None]:
# plot_returns(returns)
plot_drawdowns(returns)
# print_metrics(returns)

In [69]:
returns

Unnamed: 0_level_0,daily_returns,cumulative_returns,cumulative_%_returns,overall_returns,overall_cumulative_returns,overall_cumulative_%_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,0.001145,0.001145,0.114450,-0.005613,-0.005613,-0.561270
2020-01-03,0.001510,0.002657,0.265652,0.001704,-0.003918,-0.391812
2020-01-06,-0.005327,-0.002685,-0.268471,-0.000356,-0.004272,-0.427238
2020-01-07,0.001725,-0.000964,-0.096429,0.001620,-0.002659,-0.265883
2020-01-08,0.006632,0.005662,0.566159,0.003160,0.000492,0.049237
...,...,...,...,...,...,...
2023-12-08,0.007966,0.214159,21.415860,0.008533,0.666257,66.625661
2023-12-11,0.002552,0.217257,21.725685,-0.001159,0.664325,66.432529
2023-12-12,0.020018,0.241623,24.162342,0.022954,0.702528,70.252833
2023-12-13,-0.015572,0.222289,22.228904,0.015629,0.729138,72.913782


In [88]:
sharpe1 = round(returns['daily_returns'].mean() / returns['daily_returns'].std() * np.sqrt(252), 2)
sharpe2 = round(returns['overall_returns'].mean() / returns['overall_returns'].std() * np.sqrt(252), 2)
print(sharpe1)
sharpe2

0.35


0.67