The following cells preprocess the data to prepare it for risk model evaluation.  The following
steps are taken:
1. Check for stationarity using ADF and KPSS tests
2. If the data is non-stationary, plot the data to see if it has a trend, seasonal component, or cyclic component
3. If the data has a trend, remove the trend
4. If the data has a seasonal component, remove the seasonal component
5. If the data has a cyclic component, remove the cyclic component
6. After applying transformations, check to see if the data is stationary
7. If the transformations were successful, standardize the data
8. Evaluate the risk models

In [None]:
# load stock prices
import yfinance as yf

tickers = ["MSFT", "AMZN", "KO", "MA", "COST", 
           "LUV", "XOM", "PFE", "JPM", "UNH", 
           "ACN", "DIS", "GILD", "F", "TSLA"] 
df = yf.download(tickers, period="max",auto_adjust=False)['Adj Close'].loc["1990":]

# drop any rows with NaN values
df = df.dropna(axis=0, how='any')

df

In [None]:
# import functions from utils
import sys
import os

# Get absolute path to src directory
src_path = os.path.abspath(os.path.join(os.path.dirname('__file__'), '..', 'src'))

# Only add to path if not already there
if src_path not in sys.path:
    sys.path.append(src_path)

from utils import test_stationarity

# Print the path to make sure src is included
print(sys.path)

# Print available functions in utils
print(test_stationarity.__doc__)

In [None]:
# test stationarity of the raw data
results = test_stationarity(df)

results

In [None]:
# start with differencing the data and see if that makes the data stationary
df_diff = df.diff().dropna()

results = test_stationarity(df_diff)
print(results.to_string(float_format=lambda x: '%.6f' % x if isinstance(x, float) else x))

In [None]:
import numpy as np

# next try taking the log of the data and see if that makes the data stationary
df_log = np.log(df)

results = test_stationarity(df_log)
print(results.to_string(float_format=lambda x: '%.6f' % x if isinstance(x, float) else x))

In [None]:
# Apply log returns transformation to all columns
df_log_returns = np.log(df).diff().dropna()

# Test stationarity for all tickers
results = test_stationarity(df_log_returns)
print(results.to_string(float_format=lambda x: '%.6f' % x if isinstance(x, float) else x))