# Deep learning technical model for ETFs

Based on the kibot data

* Based of etf_deep_learning_2020.04.17
* Attempt to get minibatch training going --- was not able to in the last version

In [None]:
# preamble
%alias_magic  nbagg matplotlib -p nbagg
%alias_magic  inline matplotlib -p inline
%load_ext memory_profiler

from io import BytesIO
from IPython.display import display, Image, HTML
from pylab import *
plt.style.use( 'seaborn-whitegrid' )

from madmax.api import *
mx = mx.reload()
mxtr = mxtr.reload()

from research import yahoo; reload( yahoo)

###################################################################
# Interactive configuration 
# When running a config grid this will throw a FrozenException 
# and the grid Config will be used
###################################################################
try:
    mx.Config.name = 'etf_technical/deep_learning/2020.04.27'
    mx.Config.mode = 'interactive'
    mx.Config.hps = mx.HPSet()
    mx.Config.code = 'etf_deep_learning_2020.04.27.ipynb'
    disp( 'Running template configuration ', h=2 )        
except mx.Config.FrozenException:
    if mx.Config.mode != 'grid':
        raise RuntimeError( 'An unexpected configruation encountered' )
    disp( 'Running a grid configuration ', h=2 )

hps = mx.Config.hps
mx.Config.start()


device = 'cuda'
dtype = tr.float32

In [None]:
# the various features in the data
####################################################################
from research.etf_technical import load_data as eld; reload( eld )


price_features = ['open_30min', 'high_30min', 'low_30min', 'close_30min', 
                  'open_day', 'high_day', 'low_day', 'close_day', 'close_prev_day', 
                  'open_week', 'high_week', 'low_week', 'close_week', 'close_prev_week', 
                  'open_month', 'high_month', 'low_month', 'close_month', ]
                      
volume_features = ['volume_30min', 'volume_intraday', 'volume_intraweek', 'volume_1hr', 'volume_1day', 
                       'dollar_volume_30min', 'dollar_volume_intraday',
                       'dollar_volume_intraweek', 'dollar_volume_1hr', 'dollar_volume_1day',]
                       
calendar_features = [ 'week', 'month', 'timeofday', 'dayofweek', 'dayofmonth', 
                          'weekofyear', 'cos_timeofday', 'sin_timeofday', 
                          'cos_dayofweek', 'sin_dayofweek', 'cos_dayofmonth',
                          'sin_dayofmonth', 'cos_weekofyear', 'sin_weekofyear',]

returns_features = ['logrtn_lag_intraday', 'logrtn_lag_overnight', 'logrtn_lag_intraweek',
                    'logrtn_lag_weekend', 'logrtn_lag_intramonth', 'logrtn_lag_30min',
                    'logrtn_lag_1hr','logrtn_lag_1day', ]

responses = ['logrtn_lead_30min',  'logrtn_lead_1hr', 'logrtn_lead_1day'] #'logrtn_lead_intraday', 


################################################################################################
# build features for the deep learning (same as teh code for the 30-min online model)
################################################################################################
@mx.operatorize( memoize='md', consumes_features=False, produces_meta=True )
def FeatureBuilder( data, responses=responses,  emv=True, ema=True, equalize_lagging_returns=False,
                    haar=False, dilation=2, levels=5, winsorize=0.05, smz=False,
                    volume_features=volume_features, returns_features=returns_features,
                    verbose=False ):
    '''
    build up the 30 min dataset for learning
    
    equalize_lagging_returns:
        divide out annual vol from all lagging returns features
        Leading returns features are equalized anyways
    '''
    INTS_PER_DAY = 13
    logger = mx.Logger( 'build_features', verbose=verbose )
    das = []
    with logger.timer( 'loading the data'):
        for symbol in logger.pbar( eld.kibot_day_files.keys() ):
            if symbol.startswith( 'INCOMPLETE' ):
                logger.warn( f'No data for {symbol} - SKIPPING' )
                continue
            logger.info( 'loading data for ', symbol )
            da, meta = eld.get_saved_30min_data( symbol=symbol )
            # add a mask to indicate when this feature was valid
            da = da.assign_features( valid=da.loc[:,:,'open_30min'].isfinite() )
            das.append( da )
        da = xa.concat( das, dim='symbol' )
    features = volume_features + returns_features
    with logger.timer( ' compute log features' ):
        for ft in ['dollar_volume_30min', 'open_30min', 'dollar_volume_intraday',  'dollar_volume_intraweek',] :
            da = da.assign_features( **{ f'log_{ft}': da.loc[:,:, ft].log() } )
            features.append( f'log_{ft}' )
    with logger.timer(' compute an annual returns volatility'):
        avc = mx.transforms.rolling( windows='252D', sid='symbol', min_periods=INTS_PER_DAY*60 ).sd()
        da = avc( da, features='logrtn_lag_30min' )
        annual_vol = avc.output_features[0]
        # equalize the response variable
        equalized_responses = []
        for response in responses:
            equalized_response = f'{response}_equalized'
            da = da.assign_features( **{equalized_response: da.loc[:,:,response] / ( da.loc[:,:,annual_vol] + 1e-12 ) } )
            equalized_responses.append( equalized_response )
    if equalize_lagging_returns:
        # equalize all the laggign returns columns inplace ....
        with logger.timer( 'compute "normalized" leading and lagging returns' ):
            da.loc[:,:,returns_features] =  da.loc[:,:,returns_features] / ( da.loc[:,:,annual_vol] + 1e-12 )
            # volume, price and returns vol
    if emv:
        with logger.timer( 'compute volatility' ):
            emv = mx.transforms.exponential( halflifes=np.array( [2, 10, 30] ) * INTS_PER_DAY, sid='symbol' ).sd()
            da = emv( da, features=['log_open_30min', 'log_dollar_volume_30min', 'logrtn_lag_30min', 'logrtn_lag_1day'] )
            features += emv.output_features
    # volume, price and returns sma
    if ema:
        with logger.timer( 'compute ema' ):
            ema = mx.transforms.exponential( halflifes=np.array( [2, 10, 30] ) * INTS_PER_DAY, sid='symbol' ).mean()
            da = ema( da, features=['log_open_30min', 'log_dollar_volume_30min', 'logrtn_lag_30min', 'logrtn_lag_1day'] )
            features += ema.output_features
            # compute haar transform
    if haar:
        with logger.timer( 'compute haar' ):
            haar = mx.transforms.haar( levels=levels, dilation=dilation, sid='symbol' )
            haar.features = ['log_open_30min', 'log_dollar_volume_30min', 'logrtn_lag_30min', 'logrtn_lag_1day']
            da = haar( da )
            features += haar.output_features
    # winsorize the features we've constructed and other good stuff
    if winsorize:
        with logger.timer( 'winsorizing' ):
            wins = mx.transforms.batch( sid='symbol' ).winsorize( quantiles=winsorize )
            wins.features = sorted( set(features + [annual_vol] + equalized_responses  ) )
            da = wins( da )
            # drop the original features and rename the winsorized one
            da = (da.drop_coords( features=wins.features )
                  .rename_coords( features=dict( zip( wins.output_features, wins.features ) ) ))
    # zscore the features
    if smz:
        with logger.timer( 'zscoring' ):
            zscore = mx.transforms.rolling( windows=smz, sid='symbol' ).zscore()
            da = zscore( da, features=features )
            # drop the original features and rename the zscored one
            da = da.drop_coords( features=zscore.features ).rename_coords( features=dict( zip( zscore.output_features, zscore.features ) ) )
    # add a dummy weight column
    da = da.assign_features( one=1 )
    return da, features, dict( equalized_responses=equalized_responses, annual_vol=annual_vol )


In [None]:
# --- build data nad features ---
######################################################################################################
builder = FeatureBuilder( responses=responses, volume_features=volume_features, returns_features=returns_features, smz=False  )
builder.data = None

da = builder()
disp( 'expected FeatureBuilder_8f67a6e37 got', builder.hash() )

# obtain the metadata for each symbol
metas = []
for symbol in da.symbol.values:
    for etf_list in [eld.bond_etfs, eld.equity_etfs, eld.vol_etfs]:
        if symbol in etf_list.Symbol.values:
            metas.append( etf_list[etf_list.Symbol == symbol].iloc[0].to_dict() )
            break

da = da.loc['20100101':]            
meta_df =pd.DataFrame(metas)     
hps = mx.HPSet()

## Setup learning hyperparameters

## Setup the dataset for learning


In [None]:
#setup dataset
#-----
%nbagg
mxtr = mxtr.reload() 

hps.features =([ 
    'logrtn_lag_overnight', 'logrtn_lag_weekend', 'logrtn_lag_30min', 'logrtn_lag_1hr', 'logrtn_lag_1day',
    'logrtn_lag_intraday', 'logrtn_lag_intraweek',    'logrtn_lag_intramonth',
     'logrtn_lag_30min_26.ew_sd',  'logrtn_lag_1day_26.ew_sd',  'logrtn_lag_30min_130.ew_sd', 'logrtn_lag_1day_130.ew_sd',
      'logrtn_lag_30min_390.ew_sd', 'logrtn_lag_1day_390.ew_sd','logrtn_lag_30min_26.ew_mean', 'logrtn_lag_1day_26.ew_mean',
      'logrtn_lag_30min_130.ew_mean', 'logrtn_lag_1day_130.ew_mean','logrtn_lag_30min_390.ew_mean', 'logrtn_lag_1day_390.ew_mean',
     'log_dollar_volume_30min', 'log_dollar_volume_intraday', 'log_dollar_volume_intraweek',
     'log_dollar_volume_30min_26.ew_sd',  'log_dollar_volume_30min_130.ew_sd', 'log_dollar_volume_30min_390.ew_sd', 
     'log_dollar_volume_30min_26.ew_mean',  'log_dollar_volume_30min_130.ew_mean', 'log_dollar_volume_30min_390.ew_mean', 
     'log_open_30min',
     'log_open_30min_26.ew_sd',  'log_open_30min_130.ew_sd',  'log_open_30min_390.ew_sd', 'log_open_30min_26.ew_mean', 
     'log_open_30min_130.ew_mean', 'log_open_30min_390.ew_mean', 
]) 
hps.calendar_features = [] 
#['cos_timeofday', 'sin_timeofday', 'cos_dayofweek', 'sin_dayofweek', 'cos_dayofmonth','sin_dayofmonth', 'cos_weekofyear', 'sin_weekofyear']
hps.responses = [ 'logrtn_lead_1day_equalized'] #['logrtn_lead_30min_equalized', 'logrtn_lead_1hr_equalized', 'logrtn_lead_1day_equalized'] #
hps.priming = 13 * 3 * (2 ** 4)  #  priming when computing the loss is related to the dilation and num resnetl laysers
hps.data_standardize = False

T_da = da.time.astype( np.int64 ) / mx.TIME_TO_INT
X_da = da.loc[:,:,hps.features + hps.calendar_features]
#C_da = da.loc[:,:,hps.calendar_features]
Y_da = da.loc[:,:,hps.responses]
# compute the valid-data-mask - and account for priming in it
V_da = (da.loc[:,:,'valid']
        # fill small gaps
        .ffill( dim='time', limit=13*10 ) 
        # extend out leading nulls by the priming factor
        .rolling( time=hps.priming, min_periods=hps.priming ).mean() 
        # keep only the valid data for each symbol
        .notnull() ) 
V_da = (V_da * da.loc[:,:,'valid'].fillna(0)).broadcast_like( Y_da )
# the weight matrix --- to do.
W_da = V_da 

if hps.data_standardize:
    X_da = (X_da / X_da.std( dim=['time', 'symbol'] ) )
    Y_da = (Y_da / Y_da.std( dim=['time', 'symbol'] ) )
X_da = X_da.fillna( 0 )
Y_da = Y_da.fillna( 0 )

# Build the template masker and split into trg, tst, and validation
# The priming shouls be enough to account for any lookahead issues.
masker = mxtr.PanelMasker( 
    data=da, priming=hps.priming, 
    num_its=None,  trg_mask_size=None,  
    tst_split=mx.SplitData.time_cutoff, tst_frac=0.20, tst_mask_size=None,    
    val_split=mx.SplitData.time_cutoff, val_frac=0.20, val_mask_size=None,
) 
masker.restrict( da )    
trg_mask= masker.full_mask( 'trg' )
val_mask= masker.full_mask( 'val' )
tst_mask = masker.full_mask( 'tst' )
assert len(set(trg_mask.values).union( set( tst_mask.values)).union( set(val_mask.values))) == len(da) - hps.priming
disp(
    'trg start, end', da[trg_mask].time.min().values, da[trg_mask].time.max().values, 
    'trg_frac=', len(trg_mask) / len(da),   
    '\nval start, end', da[val_mask].time.min().values, da[val_mask].time.max().values, 
    'val_frac=', len(val_mask) / len(da), 
    '\ntst start, end', da[tst_mask].time.min().values, da[tst_mask].time.max().values, 
    'tst_frac=', len(tst_mask) / len(da),
)

# the training + validation dataset
full_dataset = mxtr.TensorDataset(     
    T_da.to_tensor(  ), # do not change this dtype - otherwise you lose precision
    X_da.to_tensor( dtype=dtype ),     
    Y_da.to_tensor( dtype=dtype ),  
    W_da.to_tensor( dtype=dtype ),  
)
trg_dataset = mxtr.TensorDataset(     
    T_da[trg_mask].to_tensor(  ), 
    X_da[trg_mask].to_tensor( dtype=dtype ),     
    Y_da[trg_mask].to_tensor( dtype=dtype ),  
    W_da[trg_mask].to_tensor( dtype=dtype ),  
)
val_dataset = mxtr.TensorDataset(     
    T_da[val_mask].to_tensor(  ), 
    X_da[val_mask].to_tensor( dtype=dtype ),     
    Y_da[val_mask].to_tensor( dtype=dtype ),  
    W_da[val_mask].to_tensor( dtype=dtype ),  
)
tst_dataset  = mxtr.TensorDataset(     
    T_da[tst_mask].to_tensor(  ), 
    X_da[tst_mask].to_tensor( dtype=dtype ),     
    Y_da[tst_mask].to_tensor( dtype=dtype ),  
    W_da[tst_mask].to_tensor( dtype=dtype ),  
)

if False:
    Y_da.std( dim=['time']).to_dataframe_mx().hist(bins=100)
    figure(figsize=(20,20))
    for i, c in enumerate( X_da.features.values ):
        ax = subplot( 5, 5, i+1 )
        X_da.loc[:,:,c].std( dim=['time']).to_series().hist(bins=100, ax=ax )
        ax.set_title( c )


## The model 

In [None]:
# -- setup the model --
mxtr = mxtr.reload()
from research.etf_technical import model_v1 as mdl; reload(mdl)
mx.Config.start()

disp( mx.tensorboard.start( clear=True ) )
disp( mx.tensorboard.start_server() )

#learner = mxtr.Learner( optimizers=[],)    

hps.chi2 = None
hps.curvature = None
hps.dirichlet = None
hps.lasso = None
hps.lipschitz = None
hps.logdet = None
hps.neumann = None
hps.ridge = 1e-6
hps.smooth_lasso = None
hps.tv = 1e-6
hps.error =  'mse'
hps.activation = 'leaky_relu'
hps.lookhead = 14 # 1 day lookahead
error = mxtr.Error( metric=hps.error, kappa=1e-3 )
util = mxtr.Error( metric='util', kappa=1 )
pfo_sr = mxtr.Error( metric='pfo-sr', kappa=1e-6 )
    
mx.seed( 17 )
model = mdl.FactorResnet( 
    num_features = len(hps.features+hps.calendar_features),
    num_responses=len( hps.responses ),
    L=13 * 3, 
    dropout=False,
    batch_norm=False,
    resnet_layers=5,
    resnet_gating=False,
    resnet_channels=15,
    resnet_activation=hps.activation,
    dilation=2,
    monitor=True
).initialize( 'normal' ).to( dtype ).to( device )

disp( model.describe(), size=5 )

def trg_loss( times, X, Y, W, mask=None, patience=1  ):
    ''' the training loss composed of error + regularizations '''
    # build up the regularization terms
    model.train( True )
    act_reg, Yhat = model.activation_regularization( X, chi2=hps.chi2, tv=hps.tv, curvature=hps.curvature, dirichlet=hps.dirichlet, neumann=hps.neumann  )
    weight_reg = model.weight_regularization( ridge=hps.ridge, lasso=hps.lasso, smooth_lasso=hps.smooth_lasso, logdet=hps.logdet  )    
    lip_reg = model.lipchitz_regularization( X=X, penalty=hps.lipschitz, lr=0.5, radius=1e-2, num_its=5, tensorboard=True  )[0] if hps.lipschitz else tr.tensor(0) 
    # the error and regularization terms
    err = error( Yhat=Yhat, Y=Y, W=W, mask=mask )
    reg = act_reg + weight_reg + lip_reg
    Wsum = W[mask].sum() if mask is not None else W.sum()
    # --- logging stuff---
    if not hasattr( trg_loss, 'it_counter' ):
        trg_loss.it_counter = 0
        trg_loss.losses = []
    trg_loss.losses.append( dict( err=err.item(), Wsum=Wsum.item(), batch_start=min( times.values ), reg=reg.item(), lip_reg=lip_reg.item(), weight_reg=weight_reg.item() ) )
    if trg_loss.it_counter % patience == 0:        
        try:
            mx.tensorboard.logger.add_scalar( f'TrgLoss/error', err, trg_loss.it_counter  )        
            mx.tensorboard.logger.add_scalar( f'TrgLoss/util', util( Yhat, Y, W, mask=mask) , trg_loss.it_counter  )        
            mx.tensorboard.logger.add_scalar( f'TrgLoss/pfo_sr', pfo_sr( Yhat, Y, W, mask=mask)* np.sqrt( 252 ) , trg_loss.it_counter  )        
            mx.tensorboard.logger.add_scalars( f'TrgLoss/regularizations', dict(total=reg, weight=weight_reg, act=act_reg, lip=lip_reg ), trg_loss.it_counter )        
            mx.tensorboard.logger.add_scalars( f'TrgLoss/minibatch', dict( batch_start=times.values.min(), mask_start=times[mask].values.min() ), trg_loss.it_counter  )            
            mx.tensorboard.logger.add_scalars( f'TrgLoss/sizes', dict( Wsum=Wsum, mb_size=W[mask].sum() ), trg_loss.it_counter  )                                 
            #mx.tensorboard.logger.add_scalar( f'TrgLoss/unweighted_err', myerr, trg_loss.it_counter  )                        
            mx.tensorboard.logger.add_text( 
                f'TrgLoss/minibatch', 
                (f'batch_start={str( pd.to_datetime( min( times.values ) * mx.TIME_TO_INT ) )} '
                 f'mask_start={str( pd.to_datetime( min( times[mask].values ) * mx.TIME_TO_INT ) )}'), 
                trg_loss.it_counter  
            )            
        except Exception as e:
            warnings.warn( f'tensorboard failure {e} \n SKIPPING' )
    trg_loss.it_counter += 1
    return err + reg

def val_loss( times, X, Y, W, mask=None, patience=10 ):
    model.train( False )
    Yhat = model( X )    
    err = error( Yhat=Yhat, Y=Y, W=W, mask=mask )
    if not hasattr( val_loss, 'it_counter' ):
        val_loss.it_counter = 0
    if val_loss.it_counter % patience == 0:
        mx.tensorboard.logger.add_scalar( f'ValLoss/error', err, val_loss.it_counter  )        
        mx.tensorboard.logger.add_scalar( f'ValLoss/util', util( Yhat, Y, W, mask=mask) , val_loss.it_counter  )        
        mx.tensorboard.logger.add_scalar( f'ValLoss/pfo_sr', pfo_sr( Yhat, Y, W, mask=mask)* np.sqrt( 252 ) , val_loss.it_counter  )        
        mx.tensorboard.logger.add_scalars( f'ValLoss/minibatch', dict( batch_start=times.values.min(), mask_start=times[mask].values.min() ), val_loss.it_counter  )            
        mx.tensorboard.logger.add_scalars( f'ValLoss/sizes', dict( Wsum=W.sum(), mb_size=W[mask].sum() ), val_loss.it_counter  )                                 
        # with tr.enable_grad():
        #     lip_reg, _ = model.lipchitz_regularization( X, penalty=1, tensorboard=True, num_its=50 )
        # mx.tensorboard.logger.add_scalar( f'ValLoss/lip_reg', lip_reg, val_loss.it_counter  )        
    val_loss.it_counter += 1
    return err


In [None]:
# -setup the learner ---
############################
mxtr = mxtr.reload()
from research.etf_technical import model_v1 as mdl; reload(mdl)
mx.Config.start()

disp( mx.tensorboard.start( clear=True ) )
disp( mx.tensorboard.start_server() )

opt = mxtr.FAdam( params=model.parameters(), lr=1e-3, betas=( 0.9, 0.99 ), tensorboard=False ) # mx.Resolve.halflife( 10)['decay'], mx.Resolve.halflife( 20 )['decay'] ) )
# lr_scheduler = mxtr.CycleAdamLR( optimizer=opt, min_lr=1e-5, max_lr=1e-4, min_betas=(0.6,0.8), max_betas=(0.9, 0.9), T_max=200 )
# lr_scheduler = mxtr.WarmupAdamLR( optimizer=opt, min_lr=1e-5, max_lr=1e-3, min_betas=(0.9,0.9), max_betas=(0.9, 0.99), T_max=500 )
learner = mxtr.Learner(
    optimizers=opt,
    trg_loss=trg_loss,
    val_loss=val_loss,    
    max_its=5000,
    device=device,
    callbacks=[
        mxtr.callbacks.LossLogger( patience=1 ),
        mxtr.callbacks.ParameterLogger( named_params=model.named_parameters(), patience=10, grad_autocorr=True ),
        mxtr.callbacks.ParameterEma( halflife=10, patience=5 )
        # mxtr.callbacks.ParameterMacd( hl_fast=10, hl_slow=100, patience=5, rewind_factor=None  ),
        #mxtr.callbacks.ModelSnapshots( model=model, period=100, on_minimum=True, val_halflife=10 )
        #mxtr.calllbacks.ClipGradients( signize=False, clip=None, quantile=None, winlen=100 ),
        # mxtr.callbacks.LRSchedule( lr_scheduler ),
    ]    
)
disp( 'learner hash ', learner.hash(), h=4)
disp( hps )

In [None]:
mx.seed( 8 )
model.initialize()

In [None]:
# --- evaluate the untrained model ----
%inline
# with mx.timer( 'predicting '):
#     Yh = model.to('cpu')( X_da.to_tensor( dtype=dtype, device='cpu' ) )
#     model.to( device )
# Yh_da = Yh.to_dataarray( Y_da )

trg_start, trg_end = pd.to_datetime( (trg_dataset[0:1][0].item() * mx.TIME_TO_INT, trg_dataset[-1:][0].item() * mx.TIME_TO_INT)  )
val_start, val_end = pd.to_datetime( (val_dataset[0:1][0].item() * mx.TIME_TO_INT, val_dataset[-1:][0].item() * mx.TIME_TO_INT)  )
trg_times = da.loc[trg_start:trg_end].time
val_times = da.loc[val_start:val_end].time
tst_times = da[tst_mask].time

fig = figure( figsize=(10, 10 ) )

with mx.timer( 'regressing' ):    
    reg = mx.Regression( autocorr=[1, 13, 26] )
    rms = []
    #for i, (smp, mask) in pbar( enumerate( [('trg', trg_mask.values), ('val', val_mask.values), ('tst',tst_mask.values), ('full',range(len(Y_da))) ] ) ):
    for i, (smp, times) in pbar( enumerate( [('trg', trg_times), ('val', val_times), ('tst',tst_times), ('full', Y_da.time ) ] ) ):
        rm = reg.regress( Yh_da.loc[times], Y_da.loc[times], W_da.loc[times] )    
        ax = subplot( 2,2, i+1 )
        ax.plot( rm.pop('pnl').cumsum() )
        ax.set_title( smp, fontsize=15 )
        hr = rm.pop('hitRate')
        rm['sample'] = smp
        rms.append( rm )
disp( 'Untrained model ', h= 1)
disp( pd.DataFrame( rms ).set_index('sample') )
disp( fig )

In [None]:
# --- do the fitting ---
#build up the trianing and validation dataloaders
hps.batch_size = 2 * hps.priming  # yrs worth of batches
tds = trg_dataset #mxtr.TensorDataset( *trg_dataset[-20000:] )
vds = val_dataset #val_dataset # mxtr.TensorDataset( *val_dataset[5000:] )

trg_dataloader = mxtr.DataLoader( tds, batch_sampler=mxtr.PanelSampler( tds, batch_size=None ), pin_memory=True )
val_dataloader = mxtr.DataLoader( vds, batch_sampler=mxtr.PanelSampler( vds, batch_size=None ), pin_memory=True )

# build up the training and valdiation maskers
trg_masker = masker.clone( trg_mask_size=None, num_its=1 )
val_masker = masker.clone( val_mask_size=None, num_its=1 ) 

for it in pbar( learner.fit_iterator( data=trg_dataloader, masker=trg_masker, val_data=val_dataloader, val_masker=val_masker ) ) :
    continue
#     if it % 10 == 0:
#         tloss = val_loss( *[m.to(device) for m in tst_dataset[:]]  )
#         print( 'testing  loss', tloss )
#     if it % 500 == 0:
#         print( model.hash() )
#         disp( learner.profiles.sort_values( 'duration_mean', ascending=False ) )

# # run a explicit traning loop
# opt = mxtr.FAdam( params=model.parameters(), lr=1e-3, betas=( 0.9, 0.99 ), tensorboard=False ) # mx.Resolve.halflife( 10)['decay'], mx.Resolve.halflife( 20 )['decay'] ) )
# it  = 0
# for tmb, vmb in pbar( zip( trg_dataloader, val_dataloader ) ):
#     it += 1
#     tmb = [m.to(device) for m in tmb]
#     vmb = [m.to(device) for m in vmb]
#     trg_masker.restrict( tmb[0] )
#     val_masker.restrict( vmb[0] )
#     trg_mask_iterator = trg_masker.mask_iterator('trg')
#     val_mask_iterator = val_masker.mask_iterator('val')
#     opt.zero_grad()
#     trg_mask = next( trg_mask_iterator )
#     val_mask = next( val_mask_iterator )
#     tloss = trg_loss( *tmb, mask=trg_mask )
#     tloss.backward()
#     opt.step()
#     vloss = val_loss( *vmb, mask=val_mask )
#     if it % 10 == 0:
#         tloss = val_loss( *[m.to(device) for m in tst_dataset[:]]  )
#         print( 'testing  loss', tloss )


In [None]:
# --- evaluate the trained model ----
%inline
with mx.timer( 'predicting '):
    Yh = model.to('cpu')( X_da.to_tensor( dtype=dtype, device='cpu' ) )
    model.to(device)
Yh_da = Yh.to_dataarray( Y_da )

trg_start, trg_end = pd.to_datetime( (trg_dataset[0:1][0].item() * mx.TIME_TO_INT, trg_dataset[-1:][0].item() * mx.TIME_TO_INT)  )
val_start, val_end = pd.to_datetime( (val_dataset[0:1][0].item() * mx.TIME_TO_INT, val_dataset[-1:][0].item() * mx.TIME_TO_INT)  )
trg_times = da.loc[trg_start:trg_end].time
val_times = da.loc[val_start:val_end].time
tst_times = da[tst_mask].time

fig = figure( figsize=(10, 10 ) )

with mx.timer( 'regressing' ):    
    reg = mx.Regression( autocorr=[1, 13, 26] )
    rms = []
    #for i, (smp, mask) in pbar( enumerate( [('trg', trg_mask.values), ('val', val_mask.values), ('tst',tst_mask.values), ('full',range(len(Y_da))) ] ) ):
    for i, (smp, times) in pbar( enumerate( [('trg', trg_times), ('val', val_times), ('tst',tst_times), ('full', Y_da.time ) ] ) ):
        rm = reg.regress( Yh_da.loc[times], Y_da.loc[times], W_da.loc[times] )    
        ax = subplot( 2,2, i+1 )
        ax.plot( rm.pop('pnl').cumsum() )
        ax.set_title( smp, fontsize=15 )
        hr = rm.pop('hitRate')
        rm['sample'] = smp
        rms.append( rm )
disp( 'Trained model ', h= 1)
disp( pd.DataFrame( rms ).set_index('sample') )
disp( fig )

In [None]:
X_da.shape

In [None]:
# plot partial dependency
##############################################
%inline
fig = figure( figsize=(20, 20 ) )
for i in range(X_da.shape[-1]):
    subplot( 4, 4, i+1 )
    plot( X_da[:,:,i].to_series(), Yh_da.to_series() , '.', alpha=0.3 )

In [None]:
# run a grid of hyperparameters ...
####################################

# hyperparameters
mx.Config.hps = fr.HPSet( 
  
)
mx.Config.code = ''

# configs = []
# for obs_weight in ['no_weight', 'dollar_weight', 'log_weight']:
#     cfg = fr.Config.clone()
#     cfg.hps = cfg.hps.clone( obs_weight=obs_weight )
#     cfg.freeze()
#     display_html( cfg )
#     configs.append( cfg )
# notebook = fr.Config.run_grid( configs, outfile=fr.Config.uri+'/../cadre_price_index.20200302 - weight_grid_results.ipynb', n_jobs=None )

configs = []
fr.Config.hps.obs_weight = 'log_weight'
for cbsa_factors in [5, 8, 10]:
    cfg = fr.Config.clone()
    cfg.hps = cfg.hps.clone( cbsa_factors=cbsa_factors )
    cfg.freeze()
    #display_html( cfg )
    configs.append( cfg )
notebook = fr.Config.run_grid( configs, outfile=fr.Config.uri+'/../grid_results.ipynb', n_jobs=None )


# fr.Config.hps.obs_weight = 'log_weight'
# fr.Config.hps.cbsa_factors = 8
# configs = []
# for tv in [1, 1e-1, 1e-2, 1e-3]:
#     cfg = fr.Config.clone()
#     cfg.hps = cfg.hps.clone( obs_weight=obs_weight )
#     cfg.freeze()
#     #display_html( cfg )
#     configs.append( cfg )
# notebook = fr.Config.run_grid( configs, outfile=fr.Config.uri+'/../cadre_price_index.20200302 - tv_grid_results.ipynb', n_jobs=None )


