In [1]:
import numpy as np
import pandas as pd
import gc
from sklearn.linear_model import LinearRegression as LR

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
%matplotlib notebook
pd.set_option('display.max_rows', 500)

from valkyrie.avalon.feature_analyzer import FeatureMgr
from valkyrie.securities import stocks_good_dvd, parent
from valkyrie.quants import WinsorizedLM

  from IPython.core.display import display, HTML


# Load Data

In [3]:
#feature_mgr = FeatureMgr('20210101', '20210630', ['TWO PRC'] , 'latest')#
feature_mgr = FeatureMgr('20210101', '20210630', stocks_good_dvd() , 'eve_quoter_comet_weighted_fit_demean_1w')
rets = [1 , 2 , 5, 10, 12, 15]
ret_ns = [int(n * 2 * 60 * 6.5) for n in rets]

ret_col, s2_col = 'cywa_mkt_xh', 'cywa_mkt_s2h'
df = feature_mgr.calc_rets(ret_col, ret_ns, s2_col)
gc.collect()
df['parent'] = df['ticker'].apply(parent)

# LM Fitting

In [32]:
def lm_fit(df, xcols, ycols, wcols, quantile = 0.03, fit_intercept = False):
    hz2coeffs = {}
    #print(xcols)
    for ycol, wcol in zip(ycols, wcols):
        X = df[xcols].values.copy()        
        y = df[ycol].values.copy()
        w = df[wcol].values.copy()
        
        ym = np.sum(y*w) / np.sum(w)        
        y = y - ym
        
        wlm = WinsorizedLM(quantile = quantile, linear_model = LR, fit_intercept = fit_intercept)
        wlm.fit(X, y, w)
        r2 = wlm.score(X, y, w)
        #print(f'ycol:{ycol} r2:{r2}')#, corr:{np.sqrt(r2)}')        
        coeffs = {c.replace('feature_', '') : wlm.lm.coef_[0][i] for i, c in enumerate(df[xcols]) }# | {'intercept' : wlm.lm.intercept_[0]}
        if fit_intercept:
            coeffs = coeffs | {'intercept' : wlm.lm.intercept_[0]}
        hz2coeffs[ycol] = coeffs
        print(hz2coeffs)
                
    return r2, hz2coeffs

feature2fml = {}
#ycols = [f'risk_tv_ret_{hz}_n' for hz in ret_ns]
#wcols = [f'wgt_risk_tv_ret_{hz}_n' for hz in ret_ns]

ycols = [f'cywa_mkt_xh_ret_{hz}_n' for hz in ret_ns]
wcols = [f'wgt_cywa_mkt_xh_ret_{hz}_n' for hz in ret_ns]

# AUTOMR LM fit

In [38]:
df_automr = df[['ticker','parent'] +[c for c in df if 'cywa' in c]].copy() # + ['intercept']

#df_automr['feature_1'] = df_automr.eval(f'cywa_vlong_xh')
#df_automr['feature_2'] = df_automr.eval(f'cywa_mid_xh ')
#df_automr['feature_3'] = df_automr.eval(f'cywa_mkt_xh')
#df_automr['feature_4'] = df_automr.eval(f'pff_cywa_mkt_xh')
#df_automr['feature_5'] = df_automr.eval(f'pff_cywa_vlong_xh')

df_automr['feature_1'] = df_automr.eval(f'pff_cywa_vlong_xh - cywa_vlong_xh')
df_automr['feature_2'] = df_automr.eval(f'pff_cywa_mkt_xh - cywa_mkt_xh ')

#df_automr['feature_0'] = df_automr.eval(f'-(pff_cywa_vlong_xh - cywa_vlong_xh) + (pff_cywa_mkt_xh - cywa_mkt_xh)')

            
xcols = [c for c in df_automr if 'feature_' in c]
#print(df_automr.groupby('ticker').apply(lambda df : lm_fit(df, xcols, [ycols[0]], wcols)))

print(lm_fit(df_automr, xcols, [ycols[2]], wcols))

{'cywa_mkt_xh_ret_3900_n': {'1': -0.434072326660206, '2': 0.44181007090067037}}
(0.03036400569084652, {'cywa_mkt_xh_ret_3900_n': {'1': -0.434072326660206, '2': 0.44181007090067037}})


In [23]:
df_automr = df[['ticker','parent'] +[c for c in df if 'cywa' in c]].copy() # + ['intercept']

df_automr['feature_1'] = df_automr.eval(f'cywa_vlong_xh')
df_automr['feature_2'] = df_automr.eval(f'cywa_mid_xh ')
df_automr['feature_3'] = df_automr.eval(f'cywa_mkt_xh')
#df_automr['feature_4'] = df_automr.eval(f'pff_cywa_mkt_xh')
#df_automr['feature_5'] = df_automr.eval(f'pff_cywa_vlong_xh')

            
xcols = [c for c in df_automr if 'feature_' in c]
pff_coeffs = lm_fit(df_automr, xcols, ycols, wcols)
df_automr.groupby('parent').apply(lambda df : lm_fit(df, xcols, [ycols[0]], wcols))

parent
AGNC    0.006833
BAC     0.000586
C       0.000433
CIM     0.003369
JPM     0.000638
NLY     0.005423
NRZ     0.005891
PMT     0.004240
STAR    0.029147
TWO     0.007206
WFC     0.001860
dtype: float64