In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import random
import seaborn as sns
import matplotlib.pyplot as plt

import helper.utils as utils
import helper.data as get
import helper.decompose as decomp

from pprint import pprint

sns.set_style('whitegrid')
plt.rcParams['axes.edgecolor'] = 'k'
plt.rcParams['lines.linewidth'] = 1

rs = 88

In [2]:
start_date = '2018-01-01'
end_date = '2023-08-29'

snp_tickers = get.snp_tickers_random(random_state=rs, sample=26)
snp_prices = get.fetch_stock_data(
    snp_tickers, 
    start_date, 
    end_date,
    col='Adj Close'
)

snp_prices.head(3)

Unnamed: 0_level_0,ACN,ADBE,BA,BDX,BLK,CAH,CBOE,CE,CFG,DGX,...,JPM,LLY,MCD,NXPI,ON,ORLY,TROW,UAL,WST,YUM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02,141.472977,177.699997,282.886383,197.490311,441.443451,52.057705,121.389282,94.114975,33.833683,88.085724,...,91.350372,76.347435,152.204285,109.646767,21.809999,249.259995,85.742859,68.940002,97.52095,73.644981
2018-01-03,142.125885,181.039993,283.801239,199.439468,446.101044,51.753082,122.190308,94.485229,34.222118,88.397446,...,91.443451,76.762161,151.562897,109.767601,22.780001,253.839996,86.915718,68.489998,97.63855,73.581825
2018-01-04,143.808777,183.220001,282.724396,200.3461,451.990112,52.411732,122.664986,94.538116,34.626411,88.24604,...,92.753433,77.104767,152.626083,109.572411,22.780001,257.679993,87.597809,69.260002,97.609161,74.33091


In [3]:
# Compute stock returns

snp_returns = snp_prices.pct_change()

snp_returns.head(3)

Unnamed: 0_level_0,ACN,ADBE,BA,BDX,BLK,CAH,CBOE,CE,CFG,DGX,...,JPM,LLY,MCD,NXPI,ON,ORLY,TROW,UAL,WST,YUM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02,,,,,,,,,,,...,,,,,,,,,,
2018-01-03,0.004615,0.018796,0.003234,0.00987,0.010551,-0.005852,0.006599,0.003934,0.011481,0.003539,...,0.001019,0.005432,-0.004214,0.001102,0.044475,0.018374,0.013679,-0.006527,0.001206,-0.000858
2018-01-04,0.011841,0.012042,-0.003794,0.004546,0.013201,0.012727,0.003885,0.00056,0.011814,-0.001713,...,0.014326,0.004463,0.007015,-0.001778,0.0,0.015128,0.007848,0.011243,-0.000301,0.01018


### Indices (4 features)

In [4]:
# Engineer stock features

indices = ['VTI', 'DBC', 'AGG', '^VIX']
index_prices = get.fetch_stock_data(
    indices, 
    start_date, 
    end_date, 
    col='Adj Close'
)

index_prices.head(3)

Unnamed: 0_level_0,AGG,DBC,VTI,^VIX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-02,95.199577,16.118902,125.981003,9.77
2018-01-03,95.208282,16.215483,126.710159,9.15
2018-01-04,95.147232,16.196165,127.193207,9.22


In [5]:
# 31-day index trend

window = 30

index_return_trend = np.sign(index_prices.pct_change(window))
for col in index_return_trend:
    print(index_return_trend[col].value_counts())

 1.0    810
-1.0    583
Name: AGG, dtype: int64
 1.0    845
-1.0    544
 0.0      4
Name: DBC, dtype: int64
 1.0    931
-1.0    461
 0.0      1
Name: VTI, dtype: int64
-1.0    749
 1.0    641
 0.0      3
Name: ^VIX, dtype: int64


In [30]:
index_return_trend = index_return_trend.replace(0, np.nan)
for col in index_return_trend:
    print(index_return_trend[col].value_counts())

 1.0    810
-1.0    583
Name: AGG, dtype: int64
 1.0    845
-1.0    544
Name: DBC, dtype: int64
 1.0    931
-1.0    461
Name: VTI, dtype: int64
-1.0    749
 1.0    641
Name: ^VIX, dtype: int64


In [7]:
index_return_trend.tail(3)

Unnamed: 0_level_0,AGG,DBC,VTI,^VIX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-24,-1.0,1.0,-1.0,1.0
2023-08-25,-1.0,1.0,-1.0,1.0
2023-08-28,-1.0,1.0,-1.0,1.0


### S&P Trend (5 features)

In [8]:
# 30-day stock trend

snp_return_trend = np.sign(snp_prices.pct_change(window).mean(axis=1))
snp_return_trend.value_counts()

 1.0    920
-1.0    473
dtype: int64

In [9]:
snp_return_trend.tail(3)

Date
2023-08-24   -1.0
2023-08-25   -1.0
2023-08-28   -1.0
dtype: float64

In [10]:
# 30-day-Volatility Trend

snp_std_trend = np.sign(snp_returns.rolling(window).std().pct_change().mean(axis=1))

snp_std_trend.value_counts()

 1.0    698
-1.0    694
dtype: int64

In [11]:
snp_std_trend.tail(3)

Date
2023-08-24    1.0
2023-08-25   -1.0
2023-08-28   -1.0
dtype: float64

In [12]:
# 30-day Average Volume Trend

snp_volume = get.fetch_stock_data(
    snp_tickers,
    start_date,
    end_date,
    col='Volume'
)

snp_volume.head(3)

Unnamed: 0_level_0,ACN,ADBE,BA,BDX,BLK,CAH,CBOE,CE,CFG,DGX,...,JPM,LLY,MCD,NXPI,ON,ORLY,TROW,UAL,WST,YUM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02,3061900,2432800,2978900,2756533,526400,2410000,841900,669000,5073100,874800,...,13578800,3456700,3696900,1784200,7027400,1423500,1603500,3390600,335000,1747800
2018-01-03,2064200,2561200,3211200,1792828,448700,1785000,1010500,558200,6281200,855100,...,11901000,3078400,3789600,2873200,7889700,1358000,1169800,4590900,194400,2554900
2018-01-04,1777000,2211400,4171700,1364993,389800,2728700,917900,671700,6740100,933100,...,12953700,2462500,2756400,2153500,5719700,892200,773500,3922900,367500,1971200


In [13]:
snp_volume_trend = np.sign(snp_volume.pct_change(window).mean(axis=1))
snp_volume_trend.value_counts()

 1.0    878
-1.0    515
dtype: int64

In [14]:
snp_volume_trend.tail(3)

Date
2023-08-24   -1.0
2023-08-25    1.0
2023-08-28   -1.0
dtype: float64

In [15]:
# 30-day relative strength trend

gains = snp_returns.where(snp_returns > 0, 0)
losses = snp_returns.where(snp_returns < 0, 0)
avg_gain = gains.rolling(window=window).mean().mean(axis=1)
avg_loss = losses.rolling(window=window).mean().mean(axis=1).abs()
rel_strength = (avg_gain / avg_loss).pct_change(window)
snp_rs_trend = np.sign(rel_strength)

snp_rs_trend.value_counts()

-1.0    718
 1.0    646
dtype: int64

In [16]:
# 30-day turnover trend

turnover = snp_volume * snp_prices
snp_turnover_trend = np.sign(turnover.pct_change(window).mean(axis=1))
snp_turnover_trend.value_counts()

 1.0    924
-1.0    469
dtype: int64

In [17]:
snp_turnover_trend.tail(3)

Date
2023-08-24   -1.0
2023-08-25    1.0
2023-08-28   -1.0
dtype: float64