# Applying Indicators to Price Data
Takes the output from `iq_feed_cleaning.ipynb` and for each row, adds indicators. \
This is preferred over calculating the indicators for each timestamp/date, as it saves 
a lot of computations. \
It does require us to do a look-up, but so does the alternative.

In [None]:
import pandas as pd
import numpy as np
import pytz
eastern = pytz.timezone('US/Eastern')
from src.config import config
from concurrent.futures import ThreadPoolExecutor
import os
from src.utils.tickers import get_tickers

In [None]:
tickers = get_tickers(config.data.iqfeed.daily.cleaned)

---------

In [None]:
# Beta calculatiopn (use intraday?)
spy = pd.read_parquet(path=f"{config.data.iqfeed.daily.cleaned}/SPY_daily.parquet")
prices = pd.read_parquet(path=f"{config.data.iqfeed.daily.cleaned}/AAPL_daily.parquet")
X = pd.merge(prices, spy, left_index=True, right_index=True, suffixes=("_stock", "_SPY"))

In [None]:
X["r_stock"] = X["adj_close_stock"]/X["adj_close_stock"].shift() - 1 
X["r_SPY"] = X["adj_close_SPY"]/X["adj_close_SPY"].shift() - 1 
returns = X[["r_stock", "r_SPY"]].dropna()

In [None]:
from arch import arch_model
# define lists for storing objects
coeffs = []
cond_vol = []
std_resids = []
models = []

In [None]:
for asset in returns.columns:
    model = arch_model(returns[asset], mean = 'Constant', vol = 'GARCH', p = 1, o = 0, q = 1).fit(update_freq = 0,
                                                                                                    disp = 'off')
    coeffs.append(model.params)
    cond_vol.append(model.conditional_volatility)
    std_resids.append(model.resid / model.conditional_volatility)
    models.append(model)

In [None]:
# store the results in df
coeffs_df = pd.DataFrame(coeffs, index=returns.columns)
cond_vol_df = pd.DataFrame(cond_vol).transpose().set_axis(returns.columns, 
                                                          axis = 'columns')
std_resids_df = pd.DataFrame(std_resids).transpose().set_axis(returns.columns,
                                                             axis = 'columns')

In [None]:
# calculate the constant conditional correlation matrix (CCC) R:

R = std_resids_df.transpose().dot(std_resids_df).div(len(std_resids_df))

In [None]:
# calculate one step ahead forecastof the conditional covariance matrix
import numpy as np
diag = []
D = np.zeros((2, 2))

for model in models:
    diag.append(model.forecast(horizon = 1).variance.values[-1][0])
    
diag = np.sqrt(np.array(diag))
np.fill_diagonal(D, diag)

H = np.matmul(np.matmul(D, R.values), D)

In [None]:
H

------------

In [None]:
factors = pd.read_csv("D:/data/F-F_Research_Data_Factors_daily.CSV")
factors.columns = ["date", "Mkt-RF", "SMB", "HML", "RF"]
factors["date"] = pd.to_datetime(factors.iloc[:, 0], format="%Y%m%d")
factors.set_index("date", inplace=True)

In [None]:
A = pd.merge(X.r_stock*100, factors, left_index=True, right_index=True, how="inner").dropna()
A.iloc[0, :] = 0

In [None]:
(1 + A/100).apply(np.cumprod, axis=0).plot(logy=True)

In [None]:
factors.rolling(252).corr(X.r_stock)

In [None]:
def add_indicators(ticker):
    prices = pd.read_parquet(path=f"{config.data.iqfeed.daily.cleaned}/{ticker}_daily.parquet")
    prices["std_252"] = prices["adj_close"].pct_change().rolling(252, min_periods=252).std()*(252**0.5)
    prices["dollar_volume"] = prices["adj_volume"] * (prices["adj_close"] + prices["adj_open"])/2
    prices["r_intra_(t-1)"] = (prices["adj_close"] / prices["adj_open"] - 1).shift(periods=1)
    prices["unadj_open"] = prices["adj_open"] / prices["cum_split_ratio"]
    prices.to_parquet(path=f"{config.data.iqfeed.daily.cleaned}/{ticker}_daily.parquet")

In [None]:
pool_obj = ThreadPoolExecutor(max_workers=os.cpu_count()-1)
ans = pool_obj.map(add_indicators, tickers)
result = list(ans)