### Fetching Historical Price Data

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np

In [None]:
stocks = {
    'MSFT': '1986-03-13',
    'AAPL': '1980-12-12',
    'NVDA': '1999-01-22',
    'AMZN': '1997-05-15',
    'GOOG': '2004-08-19',
    'META': '2012-05-18',
    'TSLA': '2010-06-29'
}

historical_data = {}

def fetch_historical_data(stocks):
    # Fetching historical data
    for symbol, start_date in stocks.items():
        stock_data = yf.download(symbol, start=start_date, auto_adjust=True)
        historical_data[symbol] = stock_data

    combined_data = pd.concat(historical_data, axis=1)

    return combined_data

combined_data = fetch_historical_data(stocks)

print(combined_data.head())

# combined_data.to_csv("Historical_Data.csv")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


                           MSFT                            AAPL            \
Price                     Close High  Low Open Volume     Close      High   
Ticker                     MSFT MSFT MSFT MSFT   MSFT      AAPL      AAPL   
Date                                                                        
1980-12-12 00:00:00+00:00   NaN  NaN  NaN  NaN    NaN  0.098834  0.099264   
1980-12-15 00:00:00+00:00   NaN  NaN  NaN  NaN    NaN  0.093678  0.094108   
1980-12-16 00:00:00+00:00   NaN  NaN  NaN  NaN    NaN  0.086802  0.087232   
1980-12-17 00:00:00+00:00   NaN  NaN  NaN  NaN    NaN  0.088951  0.089381   
1980-12-18 00:00:00+00:00   NaN  NaN  NaN  NaN    NaN  0.091530  0.091959   

                                                          ...  META            \
Price                           Low      Open     Volume  ... Close High  Low   
Ticker                         AAPL      AAPL       AAPL  ...  META META META   
Date                                                      ...  

### Calculating Daily RSI

In [None]:
def calculate_rsi(data, period=14):
    delta = data['Close'].diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=period).mean()

    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def generate_signals(data, rsi_column='RSI'):
    data['Buy_Signal'] = (data[rsi_column] < 25).shift(1)
    data['Sell_Signal'] = (data[rsi_column] > 75).shift(1)
    return data

signalled_data = {}
OVERBOUGHT = 75
OVERSOLD = 25

# Iterate through each stock in the combined data to calculate RSI and generate buy & sell signals
def calculate_rsi_generate_signals(signalled_data, OVERBOUGHT, OVERSOLD):
    for symbol in combined_data.columns.levels[0]:
        stock_data = combined_data[symbol].dropna()
        
        # Calculate RSI and generate signals
        stock_data['RSI'] = calculate_rsi(stock_data)
        stock_data = generate_signals(stock_data)

        signalled_data[symbol] = stock_data

    combined_data_signalled = pd.concat(signalled_data, axis=1)

    return combined_data_signalled

combined_data_signalled = calculate_rsi_generate_signals(signalled_data, OVERBOUGHT, OVERSOLD)

print(combined_data_signalled)

# combined_data_signalled.to_csv("Historical_Data_Signalled.csv")

                                 MSFT                                      \
Price                           Close        High         Low        Open   
Ticker                           MSFT        MSFT        MSFT        MSFT   
Date                                                                        
1980-12-12 00:00:00+00:00         NaN         NaN         NaN         NaN   
1980-12-15 00:00:00+00:00         NaN         NaN         NaN         NaN   
1980-12-16 00:00:00+00:00         NaN         NaN         NaN         NaN   
1980-12-17 00:00:00+00:00         NaN         NaN         NaN         NaN   
1980-12-18 00:00:00+00:00         NaN         NaN         NaN         NaN   
...                               ...         ...         ...         ...   
2024-11-04 00:00:00+00:00  408.459991  410.420013  405.570007  409.799988   
2024-11-05 00:00:00+00:00  411.459991  414.899994  408.079987  408.369995   
2024-11-06 00:00:00+00:00  420.179993  420.450012  410.519989  412.420013   

### Backtesting using Relative Strength Index

In [None]:
symbol_list = ['MSFT', 'AAPL', 'NVDA', 'AMZN', 'GOOG', 'META', 'TSLA']

# Requirements
INITIAL_CAPITAL = 1_000_000
COMMISSION_RATE = 0.001
SLIPPAGE_RATE = 0.0002
MIN_SHARES = 10
MAX_ALLOCATION = 0.3
RISK_FREE_RATE = 0.02
TRADING_DAYS_PER_YEAR = 252
BACKTEST_START_DATE = '1981-01-01'
BACKTEST_END_DATE = '2023-12-31'

def backtesting_strategy(INITIAL_CAPITAL, COMMISSION_RATE, SLIPPAGE_RATE, MIN_SHARES, MAX_ALLOCATION, BACKTEST_START_DATE, BACKTEST_END_DATE):
    portfolio_value = INITIAL_CAPITAL
    cash = INITIAL_CAPITAL
    portfolio = {}
    portfolio_history = []
    trade_count = 0
    trade_returns = []
    wins = 0
    # Backtesting loop by date
    for date in combined_data_signalled.index[(combined_data_signalled.index >= BACKTEST_START_DATE) & (combined_data_signalled.index <= BACKTEST_END_DATE)]:
        print(f"Current date: {date}")
        buy_stocks = []
        sell_stocks = []

        for symbol in symbol_list:
            if (symbol, 'Buy_Signal') in combined_data_signalled.columns and combined_data_signalled.loc[date, (symbol, 'Buy_Signal')] == True:
                buy_stocks.append(symbol)
            if (symbol, 'Sell_Signal') in combined_data_signalled.columns and combined_data_signalled.loc[date, (symbol, 'Sell_Signal')] == True:
                sell_stocks.append(symbol)

        # print(date, buy_stocks)
        # print(date, sell_stocks)

        # Sell trades
        for stock in sell_stocks:
            print('TIME TO SELL')
            if stock in portfolio:
                sell_price = combined_data_signalled.loc[date, (stock, 'Close')][stock] * (1 - COMMISSION_RATE - SLIPPAGE_RATE)
                print(f"Sell price of {stock}:", sell_price)
                num_shares = portfolio[stock]
                proceeds = sell_price * num_shares
                cash += float(proceeds)
                trade_return = (sell_price * num_shares - INITIAL_CAPITAL / len(portfolio)) / (INITIAL_CAPITAL / len(portfolio))
                trade_returns.append(trade_return)
                wins += trade_return > 0
                del portfolio[stock]
                print(f"Sold {stock} for", proceeds)
                trade_count += 1
            else:
                print(f"{stock} not in portfolio")

        # Buy trades
        for stock in buy_stocks:
            print('TIME TO BUY')
            stock_close_price = combined_data_signalled.loc[date, (stock, 'Close')][stock]
            print(f"Buy price of {stock}:", stock_close_price)
            allocation_per_stock = min(float(cash) / len(buy_stocks), float(portfolio_value) * MAX_ALLOCATION)

            print("Cash on hand:", cash)

            if (stock not in portfolio) and (allocation_per_stock > MIN_SHARES * stock_close_price):
                buy_price = stock_close_price * (1 + COMMISSION_RATE + SLIPPAGE_RATE)
                num_shares = int(allocation_per_stock / buy_price)
                cost = buy_price * num_shares

                if cost <= cash: 
                    portfolio[stock] = num_shares
                    cash -= cost
                    print(f"Bought {stock} for", cost)
                    trade_count += 1

        # Rebalancing portfolio
        total_aum = cash + sum(combined_data_signalled.loc[date, (stock, 'Close')][stock] * shares for stock, shares in portfolio.items())
        for stock in portfolio:
            stock_value = combined_data_signalled.loc[date, (stock, 'Close')][stock] * portfolio[stock]
            if stock_value / total_aum > MAX_ALLOCATION:
                print(f"{stock} has excess shares, reallocating portfolio")
                excess_shares = portfolio[stock] - int(MAX_ALLOCATION * total_aum / combined_data_signalled.loc[date, (stock, 'Close')][stock])
                sell_price = combined_data_signalled.loc[date, (stock, 'Close')][stock] * (1 - COMMISSION_RATE - SLIPPAGE_RATE)
                cash += sell_price * excess_shares
                portfolio[stock] -= excess_shares
                print(f"{excess_shares} of {stock} sold for {sell_price} to rebalance portfolio")

        # Calculate daily portfolio value
        portfolio_value = cash + sum(combined_data_signalled.loc[date, (stock, 'Close')][stock] * shares for stock, shares in portfolio.items())
        portfolio_history.append(portfolio_value)
        print(f"Current portfolio value: {portfolio_value}")

    return portfolio_history, portfolio_value, trade_returns, trade_count, wins

portfolio_history, portfolio_value, trade_returns, trade_count, wins = backtesting_strategy(INITIAL_CAPITAL, COMMISSION_RATE, SLIPPAGE_RATE, MIN_SHARES, MAX_ALLOCATION, BACKTEST_START_DATE, BACKTEST_END_DATE)

Current date: 1981-01-02 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-05 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-06 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-07 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-08 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-09 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-12 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-13 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-14 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-15 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-16 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-19 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-20 00:00:00+00:00
Current portfolio value: 1000000
Current date: 1981-01-21 00:00:00+00:00
Current por

In [85]:
# Calculate performance metrics
portfolio_history = pd.Series(portfolio_history, index=combined_data_signalled.index[(combined_data_signalled.index >= BACKTEST_START_DATE) & (combined_data_signalled.index <= BACKTEST_END_DATE)])
total_return = (portfolio_value - INITIAL_CAPITAL) / INITIAL_CAPITAL
annual_return = (1 + total_return) ** (TRADING_DAYS_PER_YEAR / len(portfolio_history)) - 1
daily_returns = portfolio_history.pct_change().dropna()
annual_volatility = daily_returns.std() * np.sqrt(TRADING_DAYS_PER_YEAR)
max_drawdown = (portfolio_history / portfolio_history.cummax() - 1).min()

excess_daily_returns = daily_returns - (RISK_FREE_RATE / TRADING_DAYS_PER_YEAR)
sharpe_ratio = np.sqrt(TRADING_DAYS_PER_YEAR) * (excess_daily_returns.mean() / excess_daily_returns.std())
downside_returns = excess_daily_returns[excess_daily_returns < 0]
sortino_ratio = np.sqrt(TRADING_DAYS_PER_YEAR) * (excess_daily_returns.mean() / downside_returns.std())
average_return_per_trade = np.mean(trade_returns)
win_rate = wins / trade_count if trade_count > 0 else 0
expectancy = np.mean([r for r in trade_returns if r > 0]) * win_rate - (1 - win_rate) * np.mean([r for r in trade_returns if r < 0])

# Print the metrics
print(f"Total Return: {total_return:.2%}")
print(f"Annual Return: {annual_return:.2%}")
print(f"Annual Volatility: {annual_volatility:.2%}")
print(f"Maximum Drawdown: {max_drawdown:.2%}")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
print(f"Sortino Ratio: {sortino_ratio:.2f}")
print(f"Total Number of Trades: {trade_count}")
print(f"Average Return per Trade: {average_return_per_trade:.2f}")
print(f"Win Rate: {win_rate:.2%}")
print(f"Expectancy: {expectancy:.2f}")

Total Return: 3189.11%
Annual Return: 8.46%
Annual Volatility: 17.67%
Maximum Drawdown: -50.85%
Sharpe Ratio: 0.43
Sortino Ratio: 0.58
Total Number of Trades: 627
Average Return per Trade: 4.60
Win Rate: 37.96%
Expectancy: 2.64


## Questions

### 1. Which month did the portfolio have the highest return, and which stock contributed the most to that return? 

In [65]:
# Resample data into monthly returns
monthly_returns = portfolio_history.resample('ME').ffill().pct_change().dropna()

max_return_month = monthly_returns.idxmax()
highest_monthly_return = monthly_returns[max_return_month]

# print(max_return_month)
print(f"The month with the highest return is October 2001, with a return of {highest_monthly_return:.2%}.")

start_date = max_return_month.replace(day=1)
end_date = (start_date + pd.offsets.MonthEnd()).normalize()

stock_returns = {}
for stock in symbol_list:
    start_price = combined_data_signalled.loc[start_date, (stock, 'Close')][stock]
    end_price = combined_data_signalled.loc[end_date, (stock, 'Close')][stock]
    stock_returns[stock] = (end_price - start_price) / start_price if start_price != 0 else 0

top_stock = max(stock_returns, key=stock_returns.get)
print(f"The stock with the highest contribution in October 2001 is {top_stock}, with a return of {stock_returns[top_stock]:.2%}.")

The month with the highest return is October 2001, with a return of 31.06%.
The stock with the highest contribution in October 2001 is NVDA, with a return of 70.49%.


### 2. Did the portfolio outperform the S&P 500? If so, what is your rationale for the outperformance? 

In [76]:
# Fetch S&P 500 data
sp500_data = yf.download('^GSPC', start=BACKTEST_START_DATE, end=BACKTEST_END_DATE)
sp500_returns = sp500_data['Adj Close'].pct_change().dropna()['^GSPC']

[*********************100%***********************]  1 of 1 completed


In [None]:
RISK_FREE_RATE = 0.02
TRADING_DAYS_PER_YEAR = 252

# Total Return
sp500_total_return = (sp500_data['Close'].iloc[-1] / sp500_data['Close'].iloc[0]) - 1
sp500_total_return = sp500_total_return['^GSPC']

# Annual Return
sp500_annual_return = (1 + sp500_total_return) ** (TRADING_DAYS_PER_YEAR / len(sp500_returns)) - 1

# Annual Volatility
sp500_annual_volatility = sp500_returns.std() * np.sqrt(TRADING_DAYS_PER_YEAR)

# Maximum Drawdown
cumulative_returns = (1 + sp500_returns).cumprod()
sp500_max_drawdown = (cumulative_returns / cumulative_returns.cummax() - 1).min()

# Sharpe Ratio
sp500_excess_returns = sp500_returns - (RISK_FREE_RATE / TRADING_DAYS_PER_YEAR)
sp500_sharpe_ratio = np.sqrt(TRADING_DAYS_PER_YEAR) * (sp500_excess_returns.mean() / sp500_excess_returns.std())

# Sortino Ratio
sp500_downside_returns = sp500_excess_returns[sp500_excess_returns < 0]
sp500_sortino_ratio = np.sqrt(TRADING_DAYS_PER_YEAR) * (sp500_excess_returns.mean() / sp500_downside_returns.std())

print(f"S&P 500 Total Return: {sp500_total_return:.2%}")
print(f"S&P 500 Annual Return: {sp500_annual_return:.2%}")
print(f"S&P 500 Annual Volatility: {sp500_annual_volatility:.2%}")
print(f"S&P 500 Maximum Drawdown: {sp500_max_drawdown:.2%}")
print(f"S&P 500 Sharpe Ratio: {sp500_sharpe_ratio:.2f}")
print(f"S&P 500 Sortino Ratio: {sp500_sortino_ratio:.2f}")
print("\n")
print(f"Portfolio Total Return: {total_return:.2%}")
print(f"Portfolio Annual Return: {annual_return:.2%}")
print(f"Portfolio Annual Volatility: {annual_volatility:.2%}")
print(f"Portfolio Maximum Drawdown: {max_drawdown:.2%}")
print(f"Portfolio Sharpe Ratio: {sharpe_ratio:.2f}")
print(f"Portfolio Sortino Ratio: {sortino_ratio:.2f}")

S&P 500 Total Return: 3398.48%
S&P 500 Annual Return: 8.62%
S&P 500 Annual Volatility: 17.97%
S&P 500 Maximum Drawdown: -56.78%
S&P 500 Sharpe Ratio: 0.44
S&P 500 Sortino Ratio: 0.55


Portfolio Total Return: 3189.11%
Portfolio Annual Return: 8.46%
Portfolio Annual Volatility: 17.67%
Portfolio Maximum Drawdown: -50.85%
Portfolio Sharpe Ratio: 0.43
Portfolio Sortino Ratio: 0.58


The portfolio slightly underperformed in terms of total and annual returns but achieved smaller volatility and a smaller maxmimum drawdown, suggesting that the portfolio was more stable than the S&P500. Furthermore, with a higher Sortino ratio, it suggests that the portfolio did not outperform the S&P500 index in absolute terms but showed a better risk profile. The portfolio provides a slightly lower return for a slightly smaller risk, which might be preferable for an investor with a smaller risk appetite.

### 3. How would you evaluate whether this is a profitable strategy, and what tests would you conduct to assess its robustness?

Despite a high total return of 3189.11% over the long backtesting period and a positive expectancy of 2.64, its  annual return (8.46%) is not high given that this value is not inflation adjusted and the benchmark for S&P500 is an inflation-adjusted annual return of about 7%. This suggests that the high returns had occurred over a long period (given that the start of the backtesting date range was 1989-01-01). The non-optimised Sharpe ratio of the portfolio was 0.43, suggests that the ecess return per unit of volatility of the trading strategy is relatively low with the benchmark for Sharpe ratios for good strategies to be above 1.0. Furthermore, the Sortino ratio of 0.58, which is less than 1.0, that focuses on downside risk also highlights that there is high downside volatility. Lastly, with a win rate of 37.95%, it suggests that the strategy loses more often than it wins, suggesting that the strategy is not a profitable strategy in the short run but it might yield small profits in the long run.

To assess its robustness, I would conduct walk-forward analysis where I split the backtesting period into smaller intervals of 5-year periods and backtest the trading strategy over each interval. This is especially because different market conditions affect the robustness of trading strategies. For example, in the context of using RSI as a trading strategy, under economic conditions that might cause a market to have high volatility, the fluctuation of prices might cause the RSI to reach extreme levels more often, giving more frequent buy and sell signals which might increase the likelihood of false signals. Hence, period-specific market conditions might influence the effectiveness of a trading strategy and splitting hisotircal data into different periods will allow for better optimisation of parameters. 

In the same vein, splitting historical data into pre-specified time periods can also allow for stress testing where we examine how the strategy would perform during extreme market conditions like the 2008 Financial Crisis or the COVID-19 pandemic in 2020. A robust strategy should be able to perform well not just in bullish markets, but also be able to handle downturns.

Lastly, sensitivity analysis can also be conducted to identify whether the trading strategy performs well across a few parameters, for example, adjusting the RSI threshold to 70/30 or adjusting the window period. If performance varies greatly with small changes to the parameters, the strategy may be sensitive and more prone to failure under different conditions, making the trading strategy less reliable.

### 4. Do you have any suggestions to improve the current strategy?

We can combine RSI with trend indicators such as moving averages to prevent RSI from giving false signals in trending markets. For example, we can take RSI buy signals only if the stock is above its 100-day moving average, indicating a long-term upward trend or we can take the RSI sell signal only if the stock is below its 100-day moving average, indicating a long-term downward trend. Trend indicators can help to prevent us from entering trades that go against the market trend.

We could also implement dynamic RSI thresholds based on volatility, where we adjust RSI thresholds based on the existing market volatility. For example, during high volatility periods or for stocks that have higher volatility, we can use more stringent RSI thresholds and conversely during low volatility periods or for stocks that have lower volatility, we can use less stringent RSI thresholds. This helps to handle larger price swings, reducing false signals in volatile periods or in volatile stocks. 

Lastly, instead of only relying solely on the RSI to exit trades, we can implement a stop-loss and take-profit mechanism. For example, we can set a stop-loss that is based on a percentage of a stock's average true range to take into account the stock's usual price movement, triggering the stop loss only when the stock fluctuates downwards significantly from the average true range. Similarly, we can set a take-profit level that is based on a percentage of a stock's average true range, to provide room for more volatile stocks to move without triggering an exit prematurely.