In [1]:
import yfinance as yf
import pandas as pd
# import plotly.express as px
# from sklearn.linear_model import LinearRegression
import numpy as np

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import ruptures as rpt

import os

In [9]:
# Get all SNP500 stock's Closing-price timeseries data
stocks = pd.read_csv('snp500_portfolio.csv')

data = []

# SPY = SNP500 index stock
for ticker in stocks['stock'].tolist() + ['SPY']:
    df = yf.download(tickers = ticker, period = "2y", interval='1d', progress=False).dropna()
    df = df.rename(columns={"Close": "{}".format(ticker)})
    data.append(df["{}".format(ticker)])

# 2 years of data for all stocks
Data = pd.concat(data, axis=1)

# 1 year of data for all stocks
data = Data.iloc[Data.shape[0]//2:]

In [13]:
# cumulative returns of all stocks
df = ((((1 + data.pct_change(axis = 0)).cumprod()) - 1) * 100).reset_index().melt(id_vars=["Date"], var_name = 'stock')
df = pd.merge(df, stocks, how='left')
df['Date'] = df['Date'].dt.date

# sector-based cumulative returns
# each sector has only the stocks that are from the SNP500
# Each stock considered for a particular sector is given equal weight
sector_pct_chng = df.groupby(['sector', 'Date']).agg({'value': 'median'}).reset_index()
sector_pct_chng = sector_pct_chng.rename(columns={'sector': 'stock'})

df = pd.concat([df, sector_pct_chng], ignore_index = True)

In [14]:
# Why PolyFit and not other linear estimation algorithms?
# PolyFit is much faster than OLS or LinearRegress
# Any lack of performance it may have in dealing with outliers doesn't matter much if used for preliminary visualization
def getTrend(x):
    z = np.polyfit(np.arange(x.size), x, deg = 1)
    return np.poly1d(z)

In [None]:
fig = make_subplots(rows=1, cols=1, shared_xaxes = True)
fig2 = make_subplots(rows=1, cols=1, shared_xaxes = True)

button_options1 = []
button_options2 = []

for indx, row in stocks.iterrows():
    
    vis1 = [False] * stocks.shape[0] * 2
    vis1[2*indx] = True
    vis1[2*indx + 1] = True

    vis2 = [False] * stocks.shape[0] * 3
    vis2[3*indx] = True
    vis2[3*indx + 1] = True
    vis2[3*indx + 2] = True

    button_options1.append(dict(label = row['stock'], method = 'update', args = [{'visible': vis1}]))
    button_options2.append(dict(label = row['stock'], method = 'update', args = [{'visible': vis2}]))
    
    # Plot cumulative returns of stock for the past 1 year
    fig2.append_trace(
        go.Scatter(
            x = df[df['stock'] == row['stock']]['Date'],
            y = df[df['stock'] == row['stock']]['value'],
            name = "{}".format(row['stock']),
            visible = False),
        row = 1, col = 1
    )

    # Plot cumulative returns of the Industry to which the above stock belonfs
    fig2.append_trace(
        go.Scatter(
            x = df[df['stock'] == row['sector']]['Date'],
            y = df[df['stock'] == row['sector']]['value'],
            name = "{}".format(row['sector']),
            visible = False),
        row = 1, col = 1
    )

    # Plot cumulative returns of SPY
    fig2.append_trace(
        go.Scatter(
            x = df[df['stock'] == "SPY"]['Date'],
            y = df[df['stock'] == "SPY"]['value'],
            name = "SPY",
            visible = False),
        row = 1, col = 1
    )

    # Plot stock price chart
    fig.append_trace(
        go.Scatter(x = Data.index,
        y=Data[row['stock']], name = "{} Observed Closing Price".format(row['stock']),
        visible = False),
        row = 1, col = 1
    )

    # Plot stock's overall trend estimate for the past 1 year
    fig.append_trace(
        go.Scatter(x = Data.index,
        y = getTrend(Data[row['stock']])(np.arange(Data[row['stock']].size)),
        name = "{} Trend".format(row['stock']),
        visible = False),
        row = 1, col = 1
    )

fig.update_layout(
    yaxis=dict(title="USD"),
    title = "Stock Performance TrendLine",
    template = 'plotly_dark',
    updatemenus=[go.layout.Updatemenu(
        active=0,
        buttons=list([dict(label = 'None', method = 'update', args = [{'visible': [False] * stocks.shape[0] * 2}])] + button_options1)
        )
    ])

fig2.update_layout(
    template = 'plotly_dark',
    title='Performance - Cumulative Returns using Closing Prices of Daily Chart',
    yaxis=dict(title="Cumulative Returns (%)"),
    updatemenus=[go.layout.Updatemenu(
        active=0,
        buttons=list([dict(label = 'None', method = 'update', args = [{'visible': [False] * stocks.shape[0] * 3}])] + button_options2)
        )
    ])

# Plot SPY's price chart
fig1 = make_subplots(rows=1, cols=1, shared_xaxes = True)
fig1.append_trace(
    go.Scatter(x=Data.index,
    y=Data["SPY"], name = "S&P500 Observed Closing Price"),
    row = 1, col = 1
)

# Plot SPY's overall price trend estimate for the past 1 year
fig1.append_trace(
    go.Scatter(x=Data.index, y=getTrend(Data["SPY"])(np.arange(Data["SPY"].size)), name = "S&P500 Trend"),
    row = 1, col = 1
)

fig1.update_layout(yaxis=dict(title="USD"), template = 'plotly_dark',)

# Delete output.html if it already exists
if os.path.exists('snp500_stock_performance.html'):
    os.remove('snp500_stock_performance.html')

with open('snp500_stock_performance.html', 'w', encoding = 'utf-8') as f:
    f.write(fig2.to_html())
    f.write(fig.to_html())
    f.write(fig1.to_html())

Before running Mann-Kendall Trend test, we ensure that our timseries data:
1. Isn’t collected seasonally 
2. Does not have any covariates
3. Has only one data point per time period

To show the merits of using Mann-Kendall test for Trend Analysis (see AMZN and TSLA for the past 1 year):
1. If we look solely at these stock's final and initial values, they both show negative returns.
2. See their LinearTrend in the visual. LinearTrend shows TSLA as slightly decreasing and AMZN as increasing
3. But the difference is that LinearTrend tries to draw a straight line that is closest to most points on the timeseries,
while Mann-Kendall looks only at the distribution of Count(+ Change) and Count(- Change) to predict trend
4. Thus, Mann-Kendall predicts TSLA as static, but predicts AMZN as increasing

In [None]:
import pymannkendall as mk

mk.original_test((data['TSLA'].values).reshape(-1, 1))

In [None]:
(data['AMZN'].values).reshape(-1, 1)

In [None]:

    """# predict changepoints
    algo = rpt.Pelt(model="rank", min_size=60)
    algo.fit(Data[row['stock']].values)
    result = algo.predict(pen=1)"""

    # add date where changepoint is predicted
    """for indices in result:
        fig.add_vline(x=change_point_df.index[indices - 1], line_dash="dot", visible=False)"""