In [105]:
# import packages that will be used for analysis
import random

# set the random set
random.seed(4)
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

# Determining Which Functions to be used with different demands

### Stock Loading Functions

In [106]:
import yfinance as yf
missing_data_tickers = [] # use this as a list of tickers with missing data

def get_data_from_start_to_end(ticker, start_date, end_date):
    global missing_data_tickers  # Use the global list to accumulate missing tickers
    try:
        stock_data = yf.download(ticker, start=start_date, end=end_date)
        if stock_data.empty:
            missing_data_tickers.append(ticker)
            raise ValueError(f"Stock data for ticker {ticker} during the period from {start_date} to {end_date} was not found.")
        return stock_data
    except Exception as e:
        print(f"An error occurred for ticker {ticker}: {e}")
        missing_data_tickers.append(ticker)
        return None


In [107]:
# for a variety of periods load in different list of tickers
def download_stock_data_for_periods(tickers, periods):
    all_data = {}
    
    for period, (start_date, end_date) in periods.items():
        period_data = {}
        for ticker in tickers:
            data = get_data_from_start_to_end(ticker, start_date, end_date)
            if data is not None:
                period_data[ticker] = data
        all_data[period] = period_data
    
    return all_data

In [108]:
import pandas as pd

# Get the adjusted close prices
adj_close_sector_etf = {}

# Create adjusted close price only listing of sector ETFs
def get_adjusted_closed_price(nested_dict, tickers, periods):
    for period in periods:
        stock_price_df = pd.DataFrame()  # Create a new DataFrame for each period
        for ticker in tickers:
            stock_price_df[ticker] = nested_dict[period][ticker]['Adj Close']
        
        adj_close_sector_etf[period] = stock_price_df  # Store the complete DataFrame for the period
    
    return adj_close_sector_etf

### Bollinger Data

In [109]:
# create bollinger bands
import scipy.stats as stats
def add_bollinger_data(data,window,conf_int):
        z_score = stats.norm.ppf(1 - (1 - conf_int) / 2) # create a zscore from the mean

        data['middle_band'] = data['Adj Close'].rolling(window).mean()
        data['upper_band'] = data['middle_band'] + z_score * data['Adj Close'].rolling(window).std()
        data['lower_band'] = data['middle_band'] - z_score * data['Adj Close'].rolling(window).std()

        data['Signal'] = 'Hold'

        data['Signal'] = np.where(data['Adj Close'] < data['lower_band'], 'Buy', 
                              np.where(data['Adj Close'] > data['upper_band'], 'Sell', 'Hold'))

        return data

In [110]:
# create bollinger data for multiple time period and multiple tickers
def bollinger_data_multiple_periods_tickers(periods,tickers,data,window,confidence_period):
    # for each ticker in economic time periods
    for period in periods:
            for ticker in tickers:
                    try:
                        add_bollinger_data(data[period][ticker],window,confidence_period)
                    except KeyError:
                        print(f'Data for {ticker} does not exist during {period}')
    return data

### Signals

In [111]:
def list_signals(stock_data_with_signals, tickers, periods):
    # Initialize a dictionary to store Buy/Sell signals
    signals_data = {period: {ticker: {'Buy': [], 'Sell': []} for ticker in tickers} for period in periods}

    for period in periods:
        for ticker in tickers:
            # Loop over rows in stock data for the specific period and ticker
            for idx, row in stock_data_with_signals[period][ticker].iterrows():
                if row['Signal'] == 'Buy':
                    # Append the index of the Buy signal
                    signals_data[period][ticker]['Buy'].append(pd.to_datetime(idx))
                elif row['Signal'] == 'Sell':
                    # Append the index of the Sell signal
                    signals_data[period][ticker]['Sell'].append(pd.to_datetime(idx))

    return signals_data

In [112]:
def collect_signals(nested_dict, periods, tickers):
    # Initialize an empty dictionary to hold DataFrames for each period
    bb_nested_dict = {}

    for period in periods:
        # Create a DataFrame for each period with the tickers as columns
        signals_period = pd.DataFrame(columns=tickers)
        
        # Loop through each ticker and extract the 'Signal'
        for ticker in tickers:
            signals_period[ticker] = nested_dict[period][ticker]['Signal']
        
        # Store the DataFrame in the dictionary using the period as the key
        bb_nested_dict[period] = signals_period

    # Return the dictionary containing DataFrames for each period
    return bb_nested_dict

### Load Data

In [113]:
# create time periods for where this takes place
economic_cycle_periods = {

    "trough": ("2008-10-01", "2009-06-01"),
    "expansion": ("2012-01-01", "2015-01-01"),
    "peak": ("2019-06-01", "2020-02-01"),
    "contraction": ("2007-12-01", "2008-10-01"),
    'all_data': ('2005-01-01','2024-06-01')
}

economic_cycle_periods_list = ['trough','expansion','peak','contraction','all_data']

In [114]:
# create etf tickers for sectors
sector_etf_tickers = [
    'XLB', # materials sector
    'XLI', # industrials sector
    'XLF', # financials
    'XLK', # information technology
    'XLY', # consumer discretionary
    'XLP', # consumer staples
    'XLE', # energy
    'XLV', # healthcare
    'VOX', # communication services
    'XLU', # utilities
    'IYR' # real estate
    ]

In [115]:
# save nested dictionary data as a variable to be accessed.
sector_etf_data = download_stock_data_for_periods(sector_etf_tickers,economic_cycle_periods)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

#### Create Bollinger Data

In [116]:
# use 20 day moving average
# use a 95% confidence interval (2 standard deviations)
sector_etf_data = bollinger_data_multiple_periods_tickers(economic_cycle_periods_list,sector_etf_tickers,sector_etf_data,20,0.95)

In [117]:
sector_etf_data['trough']['XLV']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,middle_band,upper_band,lower_band,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2008-10-01,30.100000,30.480000,30.100000,30.250000,22.927490,6053600,,,,Hold
2008-10-02,30.250000,30.590000,29.930000,30.299999,22.965384,6353400,,,,Hold
2008-10-03,30.600000,30.600000,29.650000,29.650000,22.472729,6814400,,,,Hold
2008-10-06,29.400000,29.879999,27.410000,28.540001,21.631418,8545000,,,,Hold
2008-10-07,28.719999,28.780001,27.389999,27.850000,21.108452,5060200,,,,Hold
...,...,...,...,...,...,...,...,...,...,...
2009-05-22,25.280001,25.400000,25.070000,25.290001,19.404524,3655700,19.163589,20.034961,18.292217,Hold
2009-05-26,25.190001,25.660000,24.889999,25.520000,19.580994,4412900,19.221135,20.041099,18.401171,Hold
2009-05-27,25.549999,25.600000,25.219999,25.260000,19.381495,4591100,19.265253,20.016435,18.514072,Hold
2009-05-28,25.209999,25.590000,25.139999,25.389999,19.481239,5720000,19.310907,19.994306,18.627507,Hold


##### Get signals

In [200]:
bb_signals = collect_signals(sector_etf_data,economic_cycle_periods_list,sector_etf_tickers)

#### Get Adjusted Close Price

In [118]:
sector_etf_closed_price = get_adjusted_closed_price(sector_etf_data,sector_etf_tickers,economic_cycle_periods_list)


In [120]:
adj_close_sector_etf['trough']

Unnamed: 0_level_0,XLB,XLI,XLF,XLK,XLY,XLP,XLE,XLV,VOX,XLU,IYR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2008-10-01,23.119263,21.858328,12.413446,15.649681,22.589594,18.009504,37.393242,22.927490,36.648029,18.666502,34.011753
2008-10-02,21.458336,20.549883,11.794874,15.037063,21.771839,17.782763,35.261120,22.965384,35.561867,18.395733,31.759171
2008-10-03,21.247202,20.222773,11.278403,14.822244,21.010752,17.640238,34.840694,22.472729,35.039406,18.119310,30.064144
2008-10-06,20.198565,19.692123,10.689858,14.002767,20.419695,17.128462,32.966839,21.631418,33.396393,17.233654,29.512156
2008-10-07,19.156969,19.037903,9.560817,13.286716,19.108046,16.584290,31.159023,21.108452,31.966496,16.528519,27.019815
...,...,...,...,...,...,...,...,...,...,...,...
2009-05-22,18.848709,16.073532,7.178293,13.618086,18.520390,15.070184,29.497360,19.404524,33.037399,14.918155,18.541613
2009-05-26,19.300217,16.658966,7.412233,13.971797,19.143745,15.260780,30.122379,19.580994,34.221863,15.351324,19.500565
2009-05-27,18.576372,16.117998,7.190606,13.835135,18.725433,14.873017,29.794714,19.381495,33.881416,15.039444,18.851517
2009-05-28,18.906044,16.273611,7.393763,14.036112,18.651628,15.030755,30.783823,19.481239,34.314068,15.351324,19.208202


In [215]:
from datetime import timedelta
import pandas as pd
import numpy as np

def backtesting_stochastic(signals_nd, adj_close_nd, periods, tickers, n_sample, analysis_type):
    """
    The backtesting analysis function determines the effectiveness of different investment strategies.

    - 'signals_nd': The buy, hold or sell signals - dataframe
    - 'adj_close_nd': The adjusted close - dataframe
    - 'periods': Different time periods to backtest - dictionary
    - 'tickers': Different equitities to backtest - list
    - 'n_sample': Number of model iterations
    - 'analysis_type': The type of analysis to return ('Mean', 'Median', 'Std', 'Variance')
    """

    roi_results = {period: {ticker: [] for ticker in tickers} for period in periods}

    # Loop through each economic period
    for period, (start_date, end_date) in periods.items():
        date_range = pd.date_range(start=pd.to_datetime(start_date), end=pd.to_datetime(end_date) - timedelta(days=110))

        # Get n start investment periods from the date range
        start_dates = np.random.choice(date_range, size=n_sample, replace=True)

        for start_date in start_dates:
            start_date = pd.to_datetime(start_date)

            adj_close_period = adj_close_nd[period].loc[start_date:start_date+timedelta(days=110)]
            signals = signals_nd[period].loc[start_date:start_date+timedelta(days=110)]

            # Initialize variables for tracking profit/loss
            buy_prices = {ticker: [] for ticker in tickers}  # List to track buy prices
            total_profit = {ticker: 0 for ticker in tickers}  # Track total profit

            # Iterate over signals and adjusted close prices
            for row_idx, (signals_row, adj_close_row) in enumerate(zip(signals.iterrows(), adj_close_period.iterrows())):
                signals_row = signals_row[1]  # Extract the signal row
                adj_close_row = adj_close_row[1]  # Extract the adjusted close row

                # Loop through each ticker's signal and corresponding adjusted close price
                for ticker, (signal, adj_close_price) in zip(tickers, zip(signals_row, adj_close_row)):
                    # Handle Buy action
                    if signal == 'Buy':
                        buy_prices[ticker].append(adj_close_price)

                    # Handle Sell action (sell one share)
                    elif signal == 'Sell' and len(buy_prices[ticker]) > 0:
                        sell_price = adj_close_price
                        # Sell one share (remove the first buy price from the list)
                        buy_price = buy_prices[ticker].pop(0)
                        profit_per_share = sell_price - buy_price
                        total_profit[ticker] += profit_per_share

            # After the 110-day period, store total profits for each ticker
            for ticker in tickers:
                roi_results[period][ticker].append(total_profit[ticker])

    # Convert the results into a DataFrame
    df = pd.DataFrame(index=tickers, columns=periods)
    for period in periods:
        for ticker in tickers:
            data = pd.Series(roi_results[period][ticker])

            if analysis_type == 'Mean':
                df.at[ticker, period] = data.mean()
            elif analysis_type == 'Median':
                df.at[ticker, period] = data.median()
            elif analysis_type == 'Std':
                df.at[ticker, period] = data.std()
            elif analysis_type == 'Variance':
                df.at[ticker, period] = data.var()

    return df

In [216]:
backtest = backtesting_stochastic(bb_signals,adj_close_sector_etf,economic_cycle_periods,sector_etf_tickers,100,'Mean')
backtest

Unnamed: 0,trough,expansion,peak,contraction,all_data
XLB,2.949972,1.244436,5.257737,2.056277,4.023275
XLI,4.52528,1.681407,6.633517,0.0,2.966904
XLF,2.687358,1.342115,4.231771,0.50078,1.252708
XLK,1.506128,1.625079,7.000872,0.002344,3.59198
XLY,6.646882,3.992257,6.730469,1.785828,4.670649
XLP,1.172693,1.902352,0.263529,1.299937,1.910971
XLE,7.960637,3.641978,6.080458,9.91269,2.906805
XLV,2.72545,5.491496,5.415825,0.342312,3.286235
VOX,11.632711,4.764721,5.044086,1.19472,2.502685
XLU,1.016647,0.77443,2.417908,0.005936,2.431945


In [205]:
from datetime import timedelta
import numpy as np
import pandas as pd

def calculate_stock_roi(bb_signals_nd, adj_close_nd, periods_date, periods_list, tickers, n_sample, initial_investment, future_investments, percent_to_buy, percent_to_sell):
    # Initialize a nested dictionary to store ROI percentages for each period and ticker
    roi_results = {period: {ticker: [] for ticker in tickers} for period in periods_list}

    # Loop through each economic period
    for period in periods_list:
        # Create the date range for the current period
        date_range = pd.date_range(start=pd.to_datetime(periods_date[period][0]), end=pd.to_datetime(periods_date[period][1]) - timedelta(days=90))
        
        # Get random dates for stochastic modeling
        start_dates = np.random.choice(date_range, size=n_sample, replace=True)

        # Loop through sampled start dates
        for start_date in start_dates:
            time_stamp = pd.to_datetime(start_date)

            # Extract the adjusted close and signal data for time period
            adj_close_period = adj_close_nd[period].loc[time_stamp:time_stamp + timedelta(days=90)]
            bb_signals_period = bb_signals_nd[period].loc[time_stamp:time_stamp + timedelta(days=90)]

            # Initialize variables for each ticker
            account_balance = {ticker: future_investments for ticker in tickers}  # Separate account balance for each stock
            shares_number = {ticker: initial_investment/adj_close_period[ticker].iloc[0] for ticker in tickers}  # Initialize share count for each ticker
            shares_value = {ticker: initial_investment for ticker in tickers}   # Initialize share value for each ticker

            # Iterate over each day in the Bollinger Band signals and adjusted close prices
            for row_idx, (signals_row, adj_close_row) in enumerate(zip(bb_signals_period.iterrows(), adj_close_period.iterrows())):
                signals_row = signals_row[1]  # Extract the actual row (signals) - don't need the index
                adj_close_row = adj_close_row[1]  # Extract the actual row (adjusted close prices)

                # Now zip over the signal row and the corresponding adjusted close price for each ticker
                for ticker, (signal, adj_close_price) in zip(tickers, zip(signals_row, adj_close_row)):
                    
                    # Handle Buy action
                    if signal == 'Buy':
                        amount_to_buy = percent_to_buy * account_balance[ticker]
                        if account_balance[ticker] >= amount_to_buy:
                            shares_to_buy = amount_to_buy / adj_close_price
                            shares_number[ticker] += shares_to_buy
                            account_balance[ticker] -= amount_to_buy

                    # Handle Sell action
                    elif signal == 'Sell':
                        if shares_number[ticker] > 0:
                            shares_value[ticker] = shares_number[ticker] * adj_close_price
                            amount_to_sell = percent_to_sell * shares_value[ticker]
                            if shares_value[ticker] >= amount_to_sell:
                                shares_to_sell = amount_to_sell / adj_close_price
                                shares_number[ticker] -= shares_to_sell
                                account_balance[ticker] += amount_to_sell

            # Calculate total portfolio value for each stock at the end of the period
            for ticker in tickers:
                if shares_number[ticker] > 0:  # Only calculate value if shares are owned
                    portfolio_value = shares_number[ticker] * adj_close_period.iloc[-1][ticker]
                    total_value = account_balance[ticker] + portfolio_value
                    
                    # Calculate profit for this stock
                    profit = total_value - (initial_investment + future_investments)
                    
                    # Calculate ROI based on stock's individual account
                    roi_dollar_value = (profit / (initial_investment + future_investments)) * 100
                else:
                    roi_dollar_value = 0

                # Store ROI in the results dictionary
                roi_results[period][ticker].append(roi_dollar_value)

    return roi_results

In [208]:
def stochastic_roi(tickers,periods,return_rates_list,analysis_type):
    df = pd.DataFrame(index=tickers,columns=periods)
    for period in periods:
        for ticker in tickers:
            data = pd.Series(return_rates_list[period][ticker])
            if analysis_type=='Mean':
                df.at[ticker,period] = data.mean()
            elif analysis_type=='Median':
                df.at[ticker,period] = data.median()
            elif analysis_type=='Std':
                df.at[ticker,period] = data.std()
            elif analysis_type=='Variance':
                df.at[ticker,period] = data.var()

    return df

In [214]:
a = calculate_stock_roi(bb_signals,adj_close_sector_etf,economic_cycle_periods,economic_cycle_periods,sector_etf_tickers,10,0,100,0.,0.2)
stochastic_roi(sector_etf_tickers,economic_cycle_periods_list,a,'Mean').mean()

trough         3.331125
expansion      0.924380
peak           1.122377
contraction    0.331041
all_data       0.827272
dtype: float64