In [383]:
pip install -r Project1/requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [384]:
import yfinance as yf
import plotly.graph_objs as go
import pandas as pd

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Data downloading

In [385]:
def get_data(instrument: str,
             start_date: str,
             end_date: str,
             interval: str) -> pd.DataFrame:
    """
        Fetch historical market data from Yahoo Finance for a given instrument between the provided start and end dates at the given interval.
        The function returns a cleaned DataFrame with the data, excluding any missing values.

        Parameters:
        instrument (str): The ticker symbol of the instrument e.g. 'MSFT'.
        start_date (str): The start date for the data in 'YYYY-MM-DD' format.
        end_date (str): The end date for the data in 'YYYY-MM-DD' format.
        interval (str): The time interval between data points. Valid intervals are: ['1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h', '1d', '5d', '1wk', '1mo', '3mo'].

        Returns:
        pd.DataFrame: A Pandas DataFrame containing the historical market data for the given instrument, with any rows containing missing values (NaN) removed.
    """
    
    data = yf.download(tickers=instrument,
                       start=start_date,
                       end=end_date,
                       interval=interval)
    
    data = data.dropna(how='any')

    return data

In [386]:
def add_vix(df: pd.DataFrame, start_date: str, end_date: str, interval: str) -> pd.DataFrame:
    """
        Add the VIX adjusted close prices as a new column to an existing DataFrame.
        
        Parameters:
        df (pd.DataFrame): The existing DataFrame containing market data with a DateTime index.
        start_date (str): The start date for fetching VIX data in 'YYYY-MM-DD' format.
        end_date (str): The end date for fetching VIX data in 'YYYY-MM-DD' format.
        interval (str): The time interval between data points for VIX.
            Valid intervals are: ['1m', '2m', '5m', '15m', '30m', '60m', '90m', '1h', '1d', '5d', '1wk', '1mo', '3mo'].

        Returns:
        pd.DataFrame: The original DataFrame with an additional 'VIX' column containing adjusted close prices.
    """

    vix_data = yf.download(tickers='^VIX', start=start_date, end=end_date, interval=interval)
    vix_data = vix_data[['Adj Close']].rename(columns={'Adj Close': 'VIX'})
    df = df.merge(vix_data, how='left', left_index=True, right_index=True)
    
    return df


# Indicators

## MACD

In [387]:
def macd(data: pd.DataFrame) -> pd.DataFrame:
    """
        Calculate the MACD (Moving Average Convergence Divergence) and generate buy/sell signals.

        Parameters:
        data (pd.DataFrame): DataFrame containing historical market data.

        Returns:
        pd.DataFrame: DataFrame with additional columns for 'MACD Buy Signal' and 'MACD Sell Signal', where a 1 indicates a signal and 0 means no signal.
    """

    data = data.copy()

    data['EMA12'] = data['Adj Close'].ewm(span=12, adjust=False).mean()
    data['EMA26'] = data['Adj Close'].ewm(span=26, adjust=False).mean()
    data['MACD'] = data['EMA12'] - data['EMA26'] # MACD Line
    data['Signal Line'] = data['MACD'].ewm(span=9, adjust=False).mean() # Signal Line

    data['MACD Buy Signal'] = 0
    data['MACD Sell Signal'] = 0

    for i in range(1, len(data)):
        # buy signal: MACD crosses above Signal Line
        if data['MACD'].iloc[i] > data['Signal Line'].iloc[i] and data['MACD'].iloc[i-1] <= data['Signal Line'].iloc[i-1]:
            data.loc[data.index[i], 'MACD Buy Signal'] = 1
        # sell signal: MACD crosses below Signal Line
        elif data['MACD'].iloc[i] < data['Signal Line'].iloc[i] and data['MACD'].iloc[i-1] >= data['Signal Line'].iloc[i-1]:
            data.loc[data.index[i], 'MACD Sell Signal'] = 1

    # drop intermediate columns
    data = data.drop(columns=['EMA12', 'EMA26'])

    return data

## RSI


In [388]:
def rsi(data: pd.DataFrame, period: int = 14) -> pd.DataFrame:
    """
    Calculate the Relative Strength Index (RSI)

    Parameters:
    data (pd.DataFrame): DataFrame containing historical market data.
    period (int): The period over which to calculate RSI, typically 14 days.

    Returns:
    pd.DataFrame: DataFrame with an additional 'RSI' column.
    """
    delta = data['Adj Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    
    rs = gain / loss
    data['RSI'] = 100 - (100 / (1 + rs))
    
    return data

## ATR

In [389]:
def atr(data: pd.DataFrame, period: int = 14) -> pd.DataFrame:
    """
    Calculate the Average True Range (ATR)

    Parameters:
    data (pd.DataFrame): DataFrame containing historical market data.
    period (int): The period over which to calculate ATR, typically 14 days.

    Returns:
    pd.DataFrame: DataFrame with an additional 'ATR' column.
    """
    high_low = data['High'] - data['Low']
    high_close = abs(data['High'] - data['Adj Close'].shift())
    low_close = abs(data['Low'] - data['Adj Close'].shift())
    
    true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    data['ATR'] = true_range.rolling(window=period).mean()
    
    return data

## PCA calculation

In [390]:
def apply_pca_trend_following(data: pd.DataFrame, indicators: list = ['MACD', 'RSI', 'ATR'],
                           n_components: int = 2, buy_threshold: float = 0.5, sell_threshold: float = -0.5) -> pd.DataFrame:
    """
    Apply PCA to reduce specified indicators to principal components and generate buy/sell signals.

    Parameters:
    data (pd.DataFrame): DataFrame containing calculated indicators.
    indicators (list): List of column names to include in PCA (default is ['MACD', 'RSI', 'ATR']).
    n_components (int): Number of principal components to keep (default is 2).
    buy_threshold (float): Threshold above which to generate a buy signal for PC1.
    sell_threshold (float): Threshold below which to generate a sell signal for PC1.

    Returns:
    pd.DataFrame: DataFrame with additional columns for 'PC1', 'PC2' (if n_components=2), 'PCA Buy Signal', and 'PCA Sell Signal'.
    """
    # Ensure that required indicators are in the DataFrame
    for indicator in indicators:
        if indicator not in data.columns:
            raise ValueError(f"Missing indicator {indicator} in data. Please calculate {indicator} first.")
    
    # Standardize the data for the selected indicators
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data[indicators])

    # Apply PCA
    pca = PCA(n_components=n_components)
    principal_components = pca.fit_transform(data_scaled)

    # Add the principal components to the DataFrame
    for i in range(n_components):
        data[f'PC{i+1}'] = principal_components[:, i]
    
    # Generate buy/sell signals based on the first principal component (PC1)
    data['PCA Buy Signal'] = 0
    data['PCA Sell Signal'] = 0

    for i in range(1, len(data)):
        # Buy Signal
        if data['PC1'].iloc[i] > buy_threshold:
            data.loc[data.index[i], 'PCA Buy Signal'] = 1
        # Sell Signal
        elif data['PC1'].iloc[i] < sell_threshold:
            data.loc[data.index[i], 'PCA Sell Signal'] = 1

    return data

## Trend-following strategy

In [391]:
def trend_following_strategy(data: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate buy/sell signals based on MACD, RSI, and ATR indicators for trend-following strategy.

    Parameters:
    data (pd.DataFrame): DataFrame containing historical market data with MACD, RSI, and ATR.

    Returns:
    pd.DataFrame: DataFrame with additional columns for 'Trend Buy Signal' and 'Trend Sell Signal'.
    """
    data = data.copy()
    
    data['Trend Buy Signal'] = 0
    data['Trend Sell Signal'] = 0

    for i in range(1, len(data)):
        # Buy signal: MACD bullish, RSI > 50, and ATR is high
        if (data['MACD'].iloc[i] > data['Signal Line'].iloc[i] and 
            data['RSI'].iloc[i] > 50 and 
            data['ATR'].iloc[i] > data['ATR'].mean()):
            data.loc[data.index[i], 'Trend Buy Signal'] = 1
        
        # Sell signal: MACD bearish, RSI < 50, and ATR is high
        elif (data['MACD'].iloc[i] < data['Signal Line'].iloc[i] and 
              data['RSI'].iloc[i] < 50 and 
              data['ATR'].iloc[i] > data['ATR'].mean()):
            data.loc[data.index[i], 'Trend Sell Signal'] = 1

    return data

## Trend-reversal strategy

In [None]:
def trend_reversal_strategy(data: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate buy/sell signals based on MACD, RSI, and VIX indicators for a trend reversal strategy.

    Parameters:
    data (pd.DataFrame): DataFrame containing historical market data with columns for 'MACD', 'Signal Line', 'RSI', and 'VIX'.

    Returns:
    pd.DataFrame: DataFrame with additional columns for 'Trend Buy Signal' and 'Trend Sell Signal'.
    """
    data = data.copy()
    
    data['Trend Buy Signal'] = 0
    data['Trend Sell Signal'] = 0

    for i in range(1, len(data)):
        # Buy signal: RSI < 40, MACD bullish, VIX is high
        if (data['RSI'].iloc[i] < 40 and 
            data['MACD'].iloc[i] > data['Signal Line'].iloc[i] and 
            data['VIX'].iloc[i] > data['VIX'].mean()):
            data.loc[data.index[i], 'Trend Buy Signal'] = 1

        # Sell signal: RSI > 60, MACD bearish, VIX is low
        elif (data['RSI'].iloc[i] > 60 and 
              data['MACD'].iloc[i] < data['Signal Line'].iloc[i] and 
              data['VIX'].iloc[i] < data['VIX'].mean()):
            data.loc[data.index[i], 'Trend Sell Signal'] = 1

    return data

In [393]:
strategies = {
    'MACD': macd,
    'PCA-trend-following' : apply_pca_trend_following,
    'Trend-Following': trend_following_strategy,
    'Trend-Reversal': trend_reversal_strategy
}

In [394]:
def run_strategy(data: pd.DataFrame, strategy: str) -> pd.DataFrame:
    """
        Executes the specified trading strategy on the given data.

        Parameters:
        data (pd.DataFrame): DataFrame containing historical market data on which the strategy will be applied.
                             The DataFrame must have relevant columns for the selected strategy.
        
        strategy (str): The name of the strategy to be applied.

        Returns:
        pd.DataFrame: DataFrame with the strategy applied, including any newly added columns like buy/sell signals.
    """
    
    if strategy in strategies:
        data = strategies[strategy](data=data)
    else:
        print("Invalid strategy selected!")

    return data

# Plots

## Buy and sell signal plot

In [395]:
def plot_buy_sell_signal(data: pd.DataFrame,
                         instrument: str,
                         buy_signal_column: str,
                         sell_signal_column: str,
                         title: str):
    """
        Plots a price chart with buy and sell signals marked on it.

        Parameters:
        data (pd.DataFrame): A DataFrame containing historical market data. The DataFrame must have
                            at least the 'Adj Close' column and the specified buy/sell signal columns.
        
        instrument (str): The ticker symbol or name of the instrument being plotted.

        buy_signal_column (str): The column name containing the buy signals (e.g., 'MACD Buy Signal').
                                The function will mark buy signals where the value in this column is 1.

        sell_signal_column (str): The column name containing the sell signals (e.g., 'MACD Sell Signal').
                                The function will mark sell signals where the value in this column is 1.
        
        title (str): The title of the plot, which will be displayed on the chart after instrument name.
    """
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=data.index,
        y=data['Adj Close'],
        mode='lines',
        name='Price',
        line=dict(color='blue')
    ))

    # add buy signals to the price chart
    buy_signals = data[data[buy_signal_column] == 1].index
    fig.add_trace(go.Scatter(
        x=buy_signals,
        y=data.loc[buy_signals, 'Adj Close'],
        mode='markers',
        name=buy_signal_column,
        marker=dict(color='green', symbol='triangle-up', size=10)
    ))

    # add sell signals to the price chart
    sell_signals = data[data[sell_signal_column] == 1].index
    fig.add_trace(go.Scatter(
        x=sell_signals,
        y=data.loc[sell_signals, 'Adj Close'],
        mode='markers',
        name=sell_signal_column,
        marker=dict(color='red', symbol='triangle-down', size=10)
    ))

    fig.update_layout(
        title=f"{instrument} - {title}",
        xaxis_title="Date",
        yaxis_title="Price (USD)",
        xaxis_rangeslider_visible=False,
        hovermode="x unified",
    )

    fig.show()

# Performance Evaluation

## Simulate Trading

In [396]:
def simulate_trading(data: pd.DataFrame, buy_signal_column: str, sell_signal_column: str) -> pd.DataFrame:
    """
    Simulates trading based on buy and sell signals and returns a DataFrame with trade details.

    Parameters:
    - data (pd.DataFrame): A DataFrame containing market data, including the 'Adj Close' column
                           and the buy/sell signal columns specified by `buy_signal_column` and `sell_signal_column`.
    - buy_signal_column (str): The name of the column containing buy signals.
    - sell_signal_column (str): The name of the column containing sell signals.

    Returns:
    - trades (pd.DataFrame): A DataFrame containing details of all trades:
                                ['Entry Date', 'Entry Price', 'Exit Date', 'Exit Price', 'Profit/Loss', 'Profit/Loss (%)']
    """
    open_position = False # track if there is an open position
    trades = []

    for i in range(1, len(data)):
        if data[buy_signal_column].iloc[i] == 1 and not open_position:  # buy 
            open_position = True # open the position
            entry_price = data['Adj Close'].iloc[i]
            entry_date = data.index[i]
            print(f"Buying at {entry_date}: {entry_price:.2f}") 
        elif data[sell_signal_column].iloc[i] == 1 and open_position:  # sell
            exit_price = data['Adj Close'].iloc[i]
            exit_date = data.index[i]
            profit_loss = exit_price - entry_price
            profit_loss_percent = (profit_loss / entry_price) * 100
            trades.append({
                'Entry Date': entry_date,
                'Entry Price': entry_price,
                'Exit Date': exit_date,
                'Exit Price': exit_price,
                'Profit/Loss': profit_loss,
                'Profit/Loss (%)': profit_loss_percent
            })
            print(f"Selling at {exit_date}: {exit_price:.2f} | Profit/Loss: {profit_loss:.2f} ({profit_loss_percent:.2f}%)")
            open_position = False # close the position

    trades = pd.DataFrame(trades)
    return trades


## Win Rate

In [397]:
def calculate_win_rate(trades: pd.DataFrame) -> float:
    """
    Calculates the win rate from the trades DataFrame.

    Parameters:
    - trades (pd.DataFrame): A DataFrame containing trade details from simulate_trading.
    """
    total_trades = len(trades)
    winning_trades = trades['Profit/Loss'][trades['Profit/Loss'] > 0]
    wins = winning_trades.count()

    win_rate = (wins / total_trades) * 100 if total_trades > 0 else 0
    print(f"Win Rate: {win_rate:.2f}%")

## Avg Gain/Loss Ratio

In [398]:
def calculate_avg_gain_loss_ratio(trades: pd.DataFrame) -> float:
    """
    Calculates the Average Gain/Loss Ratio from the trades DataFrame.

    Parameters:
    - trades (pd.DataFrame): A DataFrame containing 'Profit/Loss' for all trades.
    """
    winning_trades = trades['Profit/Loss'][trades['Profit/Loss'] > 0]
    losing_trades = trades['Profit/Loss'][trades['Profit/Loss'] < 0]

    avg_gain = winning_trades.mean() if not winning_trades.empty else 0
    avg_loss = abs(losing_trades.mean()) if not losing_trades.empty else 0

    avg_gain_loss_ratio = avg_gain / avg_loss if avg_loss > 0 else float('inf')

    print(f"Average Gain/Loss Ratio: {avg_gain_loss_ratio:.2f}")

# Results

In [None]:
#Load data
instrument='AAPL'
start_date='2000-01-01'
end_date='2024-01-01'
interval='1mo'

data = get_data(instrument=instrument, start_date=start_date, end_date=end_date, interval=interval)

#Calculate strategy
data = macd(data)
data = rsi(data)
data = atr(data)
data = data.dropna()

data = run_strategy(data=data, strategy='Trend-Following')

#plot
plot_buy_sell_signal(data=data, instrument=instrument, buy_signal_column='Trend Buy Signal', sell_signal_column='Trend Sell Signal', title = 'Price with Buy/Sell Signals')

trades = simulate_trading(data=data, buy_signal_column='Trend Buy Signal', sell_signal_column='Trend Sell Signal')
calculate_win_rate(trades=trades)
calculate_avg_gain_loss_ratio(trades=trades)


[*********************100%***********************]  1 of 1 completed


Buying at 2018-06-01 00:00:00: 43.89
Selling at 2018-12-01 00:00:00: 37.67 | Profit/Loss: -6.23 (-14.19%)
Buying at 2019-09-01 00:00:00: 54.12
Selling at 2022-09-01 00:00:00: 136.54 | Profit/Loss: 82.41 (152.28%)
Buying at 2023-06-01 00:00:00: 192.51
Win Rate: 50.00%
Average Gain/Loss Ratio: 13.23


In [None]:
#Load data
instrument='GS'
start_date='2000-01-01'
end_date='2024-01-01'
interval='1mo'

data = get_data(instrument=instrument, start_date=start_date, end_date=end_date, interval=interval)
data = add_vix(df = data, start_date=start_date, end_date=end_date, interval=interval)

#Calculate strategy
data = macd(data)
data = rsi(data)
data = atr(data)
data = data.dropna()

data = run_strategy(data=data, strategy='Trend-Reversal')

#plot
plot_buy_sell_signal(data=data, instrument=instrument, buy_signal_column='Trend Buy Signal', sell_signal_column='Trend Sell Signal', title = 'Price with Buy/Sell Signals')

trades = simulate_trading(data=data, buy_signal_column='Trend Buy Signal', sell_signal_column='Trend Sell Signal')
calculate_win_rate(trades=trades)
calculate_avg_gain_loss_ratio(trades=trades)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Buying at 2009-06-01 00:00:00: 113.63
Selling at 2018-07-01 00:00:00: 205.34 | Profit/Loss: 91.71 (80.71%)
Win Rate: 100.00%
Average Gain/Loss Ratio: inf
