In [44]:
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.graph_objects as go
import plotly.express as px
from denoise_example import *

In [3]:
frac_ticks_info_df = pd.read_csv('data/frac_ticks_infos.csv')

cols = ['symbol', 'name', 'FUND_TOTAL_ASSETS', 'FUND_LEVERAGE', 'FUND_ASSET_CLASS_FOCUS', 'FUND_MGR_STATED_FEE', 'INVERSE_FUND_INDICATOR']
frac_ticks_info_df = frac_ticks_info_df[cols]
frac_ticks_info_df = frac_ticks_info_df[frac_ticks_info_df.FUND_LEVERAGE == 'N']
frac_ticks_info_df = frac_ticks_info_df[frac_ticks_info_df.INVERSE_FUND_INDICATOR == 'N']
frac_ticks_info_df.drop(['FUND_LEVERAGE', 'INVERSE_FUND_INDICATOR'], axis=1, inplace=True)
frac_ticks_info_df.set_index('symbol', inplace=True)

In [6]:
frac_ticks = frac_ticks_info_df.index.to_list()
ohlc = yf.download(frac_ticks, period='30y')
prices = ohlc['Adj Close'].dropna(how='all')

[*********************100%***********************]  818 of 818 completed


In [24]:
old_ticks = []
for tick in prices.columns:
    price = prices[tick].dropna()
    if price.index.min() < pd.Timestamp('2009-01-01'):
        old_ticks.append(tick)
rets = prices[old_ticks].dropna().pct_change().dropna()
q = rets.shape[0] / rets.shape[1]
print(q)

10.93134328358209


In [28]:
rets.shape

(3662, 335)

In [33]:
q = rets.shape[0] / rets.shape[1]
b_width = 0.1

cov = np.cov(rets, rowvar=0)
corr = cov2corr(cov)

e_val, e_vec = getPCA(corr)
e_max, var = find_max_eval(np.diag(e_val), q, bWidth=b_width)
# n_pred_facts = e_val.shape[0] - np.diag(e_val)[::-1].searchsorted(e_max)
n_pred_facts = (np.diag(e_val) > e_max).sum()

In [35]:
rets.shape

(3662, 335)

In [38]:
mp_pdf = mpPDF(var, q, pts=1000)
emp_pdf = fitKDE(np.diag(e_val), b_width, x=mp_pdf.index.values)

In [46]:
e_max

1.696376535204692

In [48]:
np.diag(e_val)

array([2.10033215e+02, 1.98552102e+01, 1.25281723e+01, 8.68016850e+00,
       7.23639257e+00, 4.99105259e+00, 3.89419685e+00, 3.42232208e+00,
       3.12804504e+00, 2.91813144e+00, 2.75041163e+00, 2.05200236e+00,
       1.85978204e+00, 1.77101779e+00, 1.52017277e+00, 1.40910015e+00,
       1.34924017e+00, 1.24218955e+00, 1.18368879e+00, 1.14996562e+00,
       1.06393696e+00, 1.03040556e+00, 1.02258712e+00, 9.95839011e-01,
       9.65367554e-01, 9.19864316e-01, 8.82681782e-01, 8.51032638e-01,
       8.30389978e-01, 8.06481304e-01, 7.58641813e-01, 7.38139972e-01,
       7.22413656e-01, 7.00404696e-01, 6.80005717e-01, 6.64145533e-01,
       6.32614925e-01, 6.10416917e-01, 5.88756070e-01, 5.82200116e-01,
       5.54600063e-01, 5.51760929e-01, 5.39370629e-01, 5.17426849e-01,
       5.04774884e-01, 4.86887646e-01, 4.76474865e-01, 4.59584176e-01,
       4.53507131e-01, 4.46349143e-01, 4.26634965e-01, 4.10428783e-01,
       4.04529541e-01, 3.84760839e-01, 3.81980234e-01, 3.64301150e-01,
      

In [47]:
pd.Series(np.diag(e_val)).describe()

count    335.000000
mean       1.000000
std       11.552618
min        0.000819
25%        0.011480
50%        0.045886
75%        0.200714
max      210.033215
dtype: float64

In [45]:
px.histogram(np.diag(e_val))

In [40]:
nbinsx = 200

## Fitting the Marcenko-Pastur PDF on a noisy covariance matrix

fig = go.Figure()
fig.add_trace(go.Scatter(x=mp_pdf.index, y=mp_pdf, mode='lines', name='Marcenko-Pastur'))
# fig.add_trace(go.Scatter(x=emp_pdf.index, y=emp_pdf, mode='markers', name='Empirical Dist'))
fig.add_trace(go.Histogram(x=np.diag(e_val), histnorm='probability density', nbinsx=nbinsx, name='Empirical Dist'))
fig.show()

In [34]:
n_pred_facts, var

(14, 0.99999)