In [None]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import json
import plotly.graph_objects as go

# Part 1: Fetching Stock Data

In [None]:
def fetch_stock_data(ticker_list, years=5):
    end_date = datetime.now()
    start_date = end_date - timedelta(days=years * 365)
    stock_data = pd.DataFrame()

    for ticker in ticker_list:
        stock = yf.Ticker(ticker)
        hist_data = stock.history(period='1d', start=start_date, end=end_date)
        close_data = hist_data['Close'].rename(ticker)
        stock_data = pd.merge(stock_data, pd.DataFrame(close_data), left_index=True, right_index=True, how='outer')
    return stock_data

# Fetch the data
ticker_list = ['AAPL', 'AMZN', 'MSFT', 'GOOGL', 'META', 'TSLA', 'NVDA', 'ADBE', 'NFLX', 'INTC']
years = 5
daily_data = fetch_stock_data(ticker_list, years)

daily_data

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOGL,META,TSLA,NVDA,ADBE,NFLX,INTC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-09-12 00:00:00-04:00,52.931549,99.500000,105.837601,58.580002,162.000000,19.369333,66.468536,267.790009,369.950012,39.068569
2018-09-13 00:00:00-04:00,54.210121,99.493500,106.974503,59.106998,161.360001,19.297333,67.246727,268.519989,368.149994,39.625080
2018-09-14 00:00:00-04:00,53.594776,98.509499,107.410316,58.898998,162.320007,19.680000,68.508194,274.690002,364.559998,39.598984
2018-09-17 00:00:00-04:00,52.167751,95.401497,106.244987,57.991501,160.580002,19.656000,67.888626,268.250000,350.350006,39.494640
2018-09-18 00:00:00-04:00,52.253948,97.052498,107.258720,58.355499,160.300003,18.997334,67.167412,270.790009,367.649994,40.085926
...,...,...,...,...,...,...,...,...,...,...
2023-09-05 00:00:00-04:00,189.699997,137.270004,333.549988,135.770004,300.149994,256.489990,485.440033,564.880005,448.679993,36.709999
2023-09-06 00:00:00-04:00,182.910004,135.360001,332.880005,134.460007,299.170013,251.919998,470.609985,561.940002,445.760010,36.980000
2023-09-07 00:00:00-04:00,177.559998,137.850006,329.910004,135.259995,298.670013,251.490005,462.410004,560.460022,443.140015,38.180000
2023-09-08 00:00:00-04:00,178.179993,138.229996,334.269989,136.380005,297.890015,248.500000,455.720001,560.359985,442.799988,38.009998


# Part 2: Momentum Strategy Simulation

In [None]:
# Resample data to different frequencies: daily, weekly, monthly
def resample_data(data, period):
    if period == 'D':
        return data
    elif period == 'W':
        return data.resample('W').last()
    elif period == 'M':
        return data.resample('M').last()

In [None]:
# Simulate a simple momentum strategy based on log returns
def simulate_momentum_strategy(data, initial_amount, top_n, tax_rate, period='M'):
    data = resample_data(data, period)
    log_returns = np.log(data / data.shift(1))
    simulation_details = pd.DataFrame(index=log_returns.index,
                                      columns=['Selected Stocks', 'Profit Before Tax', 'Tax Paid', 'Portfolio Value'])
    cash = initial_amount

    # Logic to select top stocks and calculate portfolio value
    for i in range(0, len(log_returns) - 1):
        # Identify the top_n performing stocks based on past log returns
        top_stocks = log_returns.iloc[i].sort_values(ascending=False).head(top_n)
        # Filter out stocks with negative returns
        top_stocks = top_stocks[top_stocks > 0]

        if not top_stocks.empty:
            simulation_details.loc[log_returns.index[i + 1], 'Selected Stocks'] = json.dumps(top_stocks.index.tolist())
            # Calculate the amount to allocate for each stock
            num_stocks = len(top_stocks)
            allocation_per_stock = cash / num_stocks
            # Calculate new portfolio value based on the next day's returns
            new_value = sum(allocation_per_stock * np.exp(log_returns.loc[log_returns.index[i + 1], stock]) for stock in top_stocks.index)
            # Calculate and deduct tax if there is a profit
            profit = new_value - cash
            simulation_details.loc[log_returns.index[i + 1], 'Profit Before Tax'] = round(profit, 2)

            if profit > 0:
                tax = profit * tax_rate
                new_value -= tax
                simulation_details.loc[log_returns.index[i + 1], 'Tax Paid'] = round(tax, 2)
            simulation_details.loc[log_returns.index[i + 1], 'Portfolio Value'] = round(new_value, 2)

        else:
            # No allocation, so portfolio value remains the same
            simulation_details.loc[log_returns.index[i + 1], 'Portfolio Value'] = cash
        # Update cash amount for the next round
        cash = simulation_details.loc[log_returns.index[i + 1], 'Portfolio Value']
    # Assign the initial amount to the first row
    simulation_details.loc[log_returns.index[0], 'Portfolio Value'] = initial_amount
    return simulation_details

# Configuration for the momentum strategy simulation
initial_amount = 100000
top_n = 3
tax_rate = 0.15
frequency = 'M'
simulation_details = simulate_momentum_strategy(daily_data, initial_amount, top_n, tax_rate, frequency)

simulation_details

Unnamed: 0_level_0,Selected Stocks,Profit Before Tax,Tax Paid,Portfolio Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-09-30 00:00:00-04:00,,,,100000
2018-10-31 00:00:00-04:00,,,,100000
2018-11-30 00:00:00-05:00,"[""TSLA""]",3901.34,585.2,103316.14
2018-12-31 00:00:00-05:00,"[""INTC"", ""AMZN"", ""MSFT""]",-8391.44,,94924.7
2019-01-31 00:00:00-05:00,,,,94924.7
...,...,...,...,...
2023-05-31 00:00:00-04:00,"[""META"", ""MSFT"", ""GOOGL""]",44876.21,6731.43,462416.15
2023-06-30 00:00:00-04:00,"[""NVDA"", ""TSLA"", ""NFLX""]",79589.96,11938.49,530067.61
2023-07-31 00:00:00-04:00,"[""TSLA"", ""ADBE"", ""NVDA""]",42972.46,6445.87,566594.2
2023-08-31 00:00:00-04:00,"[""ADBE"", ""META"", ""GOOGL""]",-3998.75,,562595.45


# Part 3: Simulating Individual Stock Investments

In [None]:
# Simulate how each individual stock would have performed over the same period
def track_individual_investments(data, initial_amount, simulation_details, period='W'):
    # Resample data based on the specified period
    data = resample_data(data, period)
    # Calculate returns based on the resampled data
    returns = data.pct_change()
    # Create a new DataFrame to store individual stock values over time
    individual_investments = pd.DataFrame(index=data.index, columns=data.columns)
    for stock in data.columns:
        # Simulate an investment in each stock
        individual_investments[stock] = (1 + returns[stock]).cumprod() * initial_amount
    # Include the Portfolio Value from the momentum strategy
    individual_investments['Portfolio Value'] = simulation_details['Portfolio Value']
    individual_investments['Baseline'] = individual_investments.iloc[:, :-1].T.mean()
    # Adjust the first values to match the Initial Amount.
    individual_investments.iloc[0, :] = initial_amount
    return individual_investments.fillna(0).astype(int)

individual_investments_df = track_individual_investments(daily_data, initial_amount, simulation_details, frequency)

individual_investments_df

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOGL,META,TSLA,NVDA,ADBE,NFLX,INTC,Portfolio Value,Baseline
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-09-30 00:00:00-04:00,100000,100000,100000,100000,100000,100000,100000,100000,100000,100000,100000,100000
2018-10-31 00:00:00-04:00,96952,79780,93389,90348,92295,127401,75023,91039,80661,99133,100000,92602
2018-11-30 00:00:00-05:00,79384,84381,97376,91928,85497,132371,58214,92939,76478,104927,103316,90350
2018-12-31 00:00:00-05:00,70120,74986,89191,86569,79709,125694,47553,83808,71541,99863,94924,82903
2019-01-31 00:00:00-05:00,73988,85807,91703,93273,101355,115957,51204,91802,90743,100267,94924,89610
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-31 00:00:00-04:00,327496,120399,302420,203582,160963,1155323,543134,154765,105639,76183,462416,314990
2023-06-30 00:00:00-04:00,358389,130164,313609,198329,174498,1483004,607338,181140,117737,81030,530067,364524
2023-07-31 00:00:00-04:00,362971,133479,309355,219902,193724,1515069,670897,202322,117330,86676,566594,381173
2023-08-31 00:00:00-04:00,347587,137803,302479,225618,179916,1462099,708599,207201,115916,85455,562595,377267


# Part 4: Calculating Metrics

In [None]:
from scipy.stats import ttest_1samp

def calculate_sharpe_ratio(returns, annual_risk_free_rate=0.01, frequency='D'):
    # Adjust the risk-free rate based on the frequency
    if frequency == 'D':
        adjusted_rfr = (1 + annual_risk_free_rate) ** (1/252) - 1
    elif frequency == 'W':
        adjusted_rfr = (1 + annual_risk_free_rate) ** (1/52) - 1
    elif frequency == 'M':
        adjusted_rfr = (1 + annual_risk_free_rate) ** (1/12) - 1

    excess_returns = returns - adjusted_rfr
    return excess_returns.mean() / excess_returns.std()

def t_test_portfolio_returns(portfolio_returns, bench_annual_rate=0.1, frequency='D'):
    # Adjust the risk-free rate based on the frequency
    if frequency == 'D':
        adjusted_rfr = (1 + bench_annual_rate) ** (1/252) - 1
    elif frequency == 'W':
        adjusted_rfr = (1 + bench_annual_rate) ** (1/52) - 1
    elif frequency == 'M':
        adjusted_rfr = (1 + bench_annual_rate) ** (1/12) - 1

    t_stat, p_value = ttest_1samp(portfolio_returns[1:], adjusted_rfr)  # [1:] to exclude the NaN from pct_change
    return t_stat, p_value

def calculate_metrics(dataframe, initial_amount, bench_annual_rate, frequency='D'):
    # Calculate the final and relative values
    final_values = dataframe.iloc[-1]
    relative_values = final_values / initial_amount - 1  # Subtract 1 to get the growth proportion

    # Calculate mean return and Sharpe Ratio
    returns = dataframe.pct_change()

    if frequency == 'D':
        annualization_factor = 252
    elif frequency == 'W':
        annualization_factor = 52
    elif frequency == 'M':
        annualization_factor = 12

    # Corrected annualization of mean returns
    mean_returns = (1 + returns.mean()) ** annualization_factor - 1
    sharpes = returns.apply(calculate_sharpe_ratio, annual_risk_free_rate=0.01, frequency=frequency)

    # Test if the portfolio returns are greater than the adjusted risk-free rate
    portfolio_returns = dataframe['Portfolio Value'].pct_change()
    t_stat, p_value = t_test_portfolio_returns(portfolio_returns, bench_annual_rate, frequency=frequency)

    return final_values, relative_values, mean_returns, sharpes, t_stat, p_value / 2

bench_annual_rate = 0.1

# Calculate the metrics
final_values, relative_values, mean_returns, sharpes, t_stat, p_value = calculate_metrics(individual_investments_df, initial_amount, bench_annual_rate, frequency)

# Part 5: Visualization

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_combined_charts(dataframe, final_values, relative_values, sharpes, mean_returns):
    labels = final_values.index
    colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A']

    fig = make_subplots(rows=3, cols=2,
                        subplot_titles=('Portfolio Value Over Time',
                                        '',
                                        'Final Investment Values',
                                        'Relative Investment Growth',
                                        'Annualized Sharpe Ratios',
                                        'Annualized Mean Returns'),
                        vertical_spacing=0.08)

    # Portfolio Value line chart
    fig.add_trace(go.Scatter(x=dataframe.index,
                             y=dataframe['Portfolio Value'],
                             mode='lines',
                             name='Portfolio Value',
                             line=dict(color=colors[0], width=2.5)),
                  row=1, col=1)

    # T-test and P-value
    significance_text = f"<b>T-test:</b> {t_stat:.2f}<br><b>P-value:</b> {p_value:.5f}"
    if t_stat > 2 and p_value < 0.05:
        significance_text += f"<br><b>Significantly different from {bench_annual_rate:.0%} per year!</b>"

    fig.add_annotation(
        text=significance_text,
        showarrow=False,
        xref="x2", yref="y2",
        x=0.5, y=0.5,
        font=dict(size=15),
        bgcolor="white",
        align="center"
    )

    # Final values
    fig.add_trace(go.Bar(x=labels,
                         y=final_values.values,
                         name='Final Values ($)',
                         text=[f"${v:,.2f}" for v in final_values.values],
                         textposition='outside',
                         marker_color=colors[1]),
                  row=2, col=1)

    # Relative Growth
    fig.add_trace(go.Bar(x=labels,
                         y=relative_values.values,
                         name='Relative Growth',
                         text=[f"{v:.2%}" for v in relative_values.values],
                         textposition='outside',
                         marker_color=colors[2]),
                  row=2, col=2)

    # Sharpe Ratios
    fig.add_trace(go.Bar(x=labels,
                         y=sharpes.values,
                         name='Annualized Sharpe Ratio',
                         text=[f"{v:.2f}" for v in sharpes.values],
                         textposition='outside',
                         marker_color=colors[3]),
                  row=3, col=1)

    # Mean Returns
    fig.add_trace(go.Bar(x=labels,
                         y=mean_returns.values,
                         name='Annualized Mean Returns',
                         text=[f"{v:.2%}" for v in mean_returns.values],
                         textposition='outside',
                         marker_color=colors[4]),
                  row=3, col=2)

    # Update layout
    fig.update_layout(title_text="Investment Results Overview",
                      title_font=dict(size=24, color='black', family="Arial Black"),
                      title_pad=dict(t=10),
                      showlegend=False,
                      height=1500,
                      title_x=0.5,
                      bargap=0.05,
                      )

    fig.show()

plot_combined_charts(individual_investments_df, final_values, relative_values, sharpes, mean_returns)