In [1]:
# To be able to use the quantools, due to my crap path names have to add to sys path
import sys
sys.path.insert(0, '/home/adam/Dropbox/2-creations/2-crafts/7-buidl/0-utils/quant_tools/code')

# IMPORT PACKAGES
from tensorflow.keras import layers, regularizers
from tensorflow.keras.layers import Layer
from keras.callbacks import EarlyStopping, ModelCheckpoint
from typing import List, Optional, Tuple, Dict
from tools import QuantTools
from tensorflow import keras
import tensorflow as tf
import pandas as pd
import numpy as np
import itertools
import pickle
import time
import gc

keras.mixed_precision.set_global_policy("float32")


2023-10-18 09:15:36.278957: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-18 09:15:36.334189: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def subsetRowsAndColumns(
    df: pd.DataFrame, lhs_col: str
    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    ''' Subset relevant rows and columns to form needed dataframes.

    Parameters:
        df      (pd.DataFrame): The original DataFrame containing the data.
        lhs_col (str):          The name of lhs column.

    Notes:
        - Kept characteristics based on what are most raw characteristics and 
            what work well as univariate factor to gen large spread in returns
            in pre 2h 2022 data.
        - Kept matching number of macro cols up to integer scalar which have
            high corr to avg LHS after taking out variabilitity of previous
            chosen columns.
        - Keep also previous returns as a separate dataframe for the factor
            side of the network.

    Returns:
        tuple: A tuple containing four DataFrames:
            y_df: A DataFrame with relevant rows and lhs column.
            char_df: A DataFrame with raw characteristics.
            ts_df: A DataFrame containing previous returns.
            weight_df: A DataFrame containing columns for weighting asset-dates in obj func.
    '''
    # Set column lists that I manually constructed
    char_cols_to_keep = ['char_addr_active_tm1h',
        'char_addr_new_log_delta_tm2_tm1',
        'char_alpha_tm7',
        'char_delta_flow_dist_tm1h',
        'char_delta_holders_dist_tm7',
        'char_mvrv_t',
        'char_prct_supply_in_profit_t',
        'char_r_ath_t',
        'char_r_atl_t',
        'char_r_industry_tm6h',
        'char_r_tm1',
        'char_r_tm14',
        'char_r_tm1h',
        'char_r_tm2h',
        'char_shortfall5_tm1',
        'char_size_t',
        'char_trades_t',
        'char_var5_tm90',
        'char_vol_tm6h',
        'char_volume_t']

    macro_cols_to_keep = ['macro_avg_vel_cur_1yr_t',
        'macro_btc_momr_t',
        'macro_btc_ser_t',
        'macro_btc_sopr_t',
        'macro_btc_sply_act_30d_t',
        'macro_btc_tx_tfr_cnt_t',
        'macro_btc_utxo_prof_unreal_usd_t',
        'macro_btc_vel_act_1yr_t',
        'macro_cpiaucsl_t',
        'macro_dgs1mo_t',
        'macro_eth_roi_t',
        'macro_eth_rvt_adj_t',
        'macro_ex_open_interest_future_usd_t',
        'macro_expinf1yr_t',
        'macro_m2sl_t',
        'macro_mvrv_med_t',
        'macro_t10yie_t',
        'macro_us_ex_volume_future_usd_t', 
        'macro_us_ex_volume_spot_usd_t',
        'macro_vixclsx_t']

    # Dropping some data if we want to up the ratio of real to fake data
    df = df[df.date >= '2019-01-01'].copy()

    # Form dataframe of all dates and assets and merge back on df to ensure obs for all assets
    unique_dates  = df.date.unique()
    unique_assets = df.asset.unique()
    cross_product = list(itertools.product(unique_dates, unique_assets))
    cross_df      = pd.DataFrame(cross_product, columns=['date', 'asset'])
    df            = df.merge(
        cross_df, on=['date', 'asset'], how='outer', validate='one_to_one')
    df            = df.sort_values(by=['date', 'asset'], ignore_index=True)
    assert(len(df) == (len(np.unique(df.date))*len(np.unique(df.asset))))

    # Form lhs dataframe
    y_df = df[['date', 'asset', lhs_col]].copy()
    y_df = y_df.sort_values(by=['date', 'asset'], ignore_index=True)

    # Form characteristic dataframe
    char_df = df[['date', 'asset']+char_cols_to_keep].copy()
    char_df = char_df.sort_values(by=['date', 'asset'], ignore_index=True)

    # Form previous return dataframe by extracting from char_df and reshaping wide
    r_df         = char_df[['date', 'asset', 'char_r_tm1h']].copy()
    r_df         = r_df.pivot(index='date', columns='asset', values='char_r_tm1h').reset_index()
    r_df.columns = (['date'] 
        + ['asset_r_' + str(col) + '_t1h' for col in r_df.columns if col != 'date'])

    # Form ts_df as the macro columns plus the previous return columns
    macro_df = df[df.asset=='btc'][['date']+macro_cols_to_keep].copy()
    assert(set(macro_df.date)==set(r_df.date))
    ts_df    = macro_df.merge(r_df, on='date', how='inner', validate='one_to_one')
    ts_df    = ts_df.sort_values(by='date', ignore_index=True)

    # Form weight dataframe containing columns to form for weighting asset dates in obj func
    weight_df = df[['date', 'asset', 'char_mcap_t', 'char_volume_t']].copy()
    weight_df = weight_df.rename(columns={'char_mcap_t': 'mcap',
                                        'char_volume_t': 'volume'})

    # Run final checks
    assert(set(y_df.date)==set(char_df.date))
    assert(set(y_df.date)==set(ts_df.date))
    assert(set(y_df.date)==set(weight_df.date))
    assert(0 == ((len(macro_cols_to_keep) 
                / len(char_cols_to_keep)) % 1)) # macro cols are some multiple of # char cols

    return y_df, char_df, ts_df, weight_df


In [3]:
def normalizeAndFillMissing(df: pd.DataFrame, 
        lhs_col: Optional[str] = None, lhs_pad: float = -2, rhs_pad: float = -2,
        ignore_cols: Optional[List[str]] = None) -> pd.DataFrame:
    """
    Normalize all numeric columns of a DataFrame to be between -1 and 1 based on rank,
    and set missing values to given lhs and rhs pad values.

    Parameters:
    df          (pd.DataFrame):        Input DataFrame to normalize.
    lhs_col     (str, optional):       Column name to fill missing values with lhs_pad.
    lhs_pad     (float, optional):     Value to replace missing values in lhs_col.
    rhs_pad     (float, optional):     Value to replace missing values in other columns.
    ignore_cols (List[str], optional): List of column names to ignore during normalization.

    Returns:
    pd.DataFrame: Normalized DataFrame.

    Raises:
    ValueError: If any column to normalize is not numeric.
    """
    # If ignore_cols is not provided, use an empty list
    if ignore_cols is None:
        ignore_cols = []

    # Get the columns to normalize
    cols_to_normalize = [col for col in df.columns if col not in ignore_cols]

    # Check if all columns to normalize are numeric
    if not all(dtype.kind in 'biufc' for dtype in df[cols_to_normalize].dtypes):
        raise ValueError("All columns to normalize must be numeric")

    # Check for constant columns and raise a warning
    constant_columns = df[cols_to_normalize].columns[df[cols_to_normalize].nunique() <= 1]
    if len(constant_columns) > 0:
        print(f"Warning: Columns {constant_columns} have constant values and will result in NaN after normalization.")

    # Find out how many missing in the cols to normalize 
    missing_per_column = df[cols_to_normalize].isnull().sum()

    # Rank the values in each column 
    #    -where we first rank normalize
    #    -but then add noise and 
    #         re rank normalize to ensure unique values with 
    #         even coverage of RHS latent space
    if len(cols_to_normalize) > 0:
        # Rank normalization
        df[cols_to_normalize] = df[cols_to_normalize].rank() / (len(df) - missing_per_column)

        # Add small random noise
        df[cols_to_normalize] += np.random.uniform(-1e-6, 1e-6, (len(df), len(cols_to_normalize)))

        # Re-rank after adding noise
        df[cols_to_normalize] = df[cols_to_normalize].rank() / (len(df) - missing_per_column)

        # Scale to the range [-128, 127]
        df[cols_to_normalize] = (df[cols_to_normalize] * 255) - 128

        # Confirm ranges
        assert(-128 <= df[cols_to_normalize].min().min())
        assert(127 >= df[cols_to_normalize].max().max())

    # Replace missing LHS and RHS with given buffer values
    if lhs_col is not None and lhs_col in df.columns:
        df[lhs_col].fillna(lhs_pad, inplace=True)
        df[lhs_col] = df[lhs_col].astype(np.float32)
        assert np.allclose(0, df[lhs_col].isnull().sum())
    df.fillna(rhs_pad, inplace=True)
    assert 0 == df.isnull().sum().sum()

    # Reduce data memory usage, resort, and reset index
    for col in df.select_dtypes(include=[np.float64]).columns:
        df[col] = df[col].astype(np.int8)
    if 'asset' in df.columns:
        df      = df.sort_values(by=['date', 'asset'], ignore_index=True)
    else:
        df      = df.sort_values(by=['date'], ignore_index=True)

    return df


In [4]:
def formLhsAndRhsTensors(rel_y_df: pd.DataFrame, rel_char_df: pd.DataFrame, rel_ts_df: pd.DataFrame,
                            datetimes_window: List[pd.Timestamp], prev_asset_ret_cols: List[str],
                            macro_cols: List[str], lhs_col: str, num_lags: int, num_assets: int,
                            num_chars: int) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
    """
    Form tensors for loading input, factor input, and output for machine learning model.

    Parameters:
        rel_y_df            (pd.DataFrame):       DataFrame containing target variable (response) data.
        rel_char_df         (pd.DataFrame):       DataFrame containing characteristics data.
        rel_ts_df           (pd.DataFrame):       DataFrame containing time series data (factors).
        datetimes_window    (List[pd.Timestamp]): List of datetime values for which to form tensors.
        prev_asset_ret_cols (List[str]):          List of column names for previous asset returns in `rel_ts_df`.
        macro_cols          (List[str]):          List of column names for macroeconomic data in `rel_ts_df`.
        lhs_col             (str):                The column name in `rel_y_df` representing the target variable.
        num_lags            (int):                Number of lagged time steps to consider.
        num_assets          (int):                Number of assets (entities).
        num_chars           (int):                Number of characteristics.
        batch_size          (int):                Number of observations per batch.

    Returns:
        Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: A tuple containing three tensors:
            - `loading_input`: Tensor containing the concatenated characteristics and macro data.
            - `factor_input`: Tensor containing previous asset return data.
            - `output`: Tensor containing the target returns.

    Raises:
        ValueError: If the input data shapes are not as expected.
    """
    # Initialize lists to return
    loading_input_list = []
    factor_input_list  = []
    output_list        = []

    # Loop over all datetimes to form LHS and RHS for
    for i, datetime in enumerate(datetimes_window):
        # Form the beginning datetime for this observation given the number of lags to step back
        datetime_input_start = datetime - pd.Timedelta(hours=num_lags - 1)

        # Obtain output data
        output_list.append(rel_y_df.loc[rel_y_df.date == datetime, lhs_col].values)

        # Obtain input data
        rel_char_filtered = rel_char_df[
            (rel_char_df.date >= datetime_input_start) & (rel_char_df.date <= datetime)
            ].drop(columns=['date', 'asset'])

        char_data = rel_char_filtered.values.reshape(
                        (num_lags, num_assets, num_chars)
                        ).transpose((0, 2, 1))

        rel_ts_filtered = rel_ts_df[
            (rel_ts_df.date >= datetime_input_start) & (rel_ts_df.date <= datetime)]

        if len(rel_ts_filtered) < num_lags:
            raise ValueError("Not enough data points for the given number of lags.")

        macro_data = rel_ts_filtered[macro_cols].values.reshape((num_lags, num_chars, -1))

        # Append to lists of input data
        loading_input_list.append(np.concatenate((char_data, macro_data), axis=2))
        factor_input_list.append(rel_ts_filtered[prev_asset_ret_cols].values)

    # Convert validation data lists to tensors
    loading_input = tf.convert_to_tensor(np.array(loading_input_list), dtype=tf.int8)
    factor_input  = tf.convert_to_tensor(np.array(factor_input_list), dtype=tf.int8)
    output        = tf.convert_to_tensor(np.array(output_list), dtype=tf.float32)

    return loading_input, factor_input, output


In [5]:
class TransformerEncoder(layers.Layer):
    def __init__(self, num_heads: int, hidden_dim: int, dropout_pct: float,
                 dense_dim: int, l2_penalty: float, **kwargs):
        """
        TransformerEncoder layer.

        Parameters:
            num_heads   (int):   Number of attention heads in the multi-head attention layer.
            hidden_dim  (int):   Dimension of the hidden layers.
            dropout_pct (float): Dropout rate as a percentage.
            dense_dim   (int):   Dimension of the dense layers.
            l2_penalty  (float): L2 regularization penalty.
            **kwargs: Additional arguments for the base Layer class.
        """
        super().__init__(**kwargs)
        self.num_heads   = num_heads
        self.hidden_dim  = hidden_dim
        self.dropout_pct = dropout_pct
        self.dense_dim   = dense_dim
        self.l2_penalty  = l2_penalty
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=hidden_dim,
            kernel_initializer='glorot_uniform',
            bias_initializer='random_uniform',
            dropout = dropout_pct,
            kernel_regularizer=regularizers.l2(l2=l2_penalty))
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation='gelu',
                        kernel_initializer='glorot_uniform',
                        bias_initializer='random_uniform',
                        kernel_regularizer=regularizers.l2(l2=l2_penalty)),
                layers.Dropout(dropout_pct),
                layers.Dense(hidden_dim, activation='linear',
                            kernel_initializer='glorot_uniform',
                            bias_initializer='random_uniform',
                            kernel_regularizer=regularizers.l2(l2=l2_penalty)),
                layers.Dropout(dropout_pct),
                ]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
    
    def call(self, inputs):
        attention_output = self.attention(query=inputs, key=inputs, value=inputs)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "num_heads":  self.num_heads,
            "hidden_dim": self.hidden_dim,
            "dropout_pct": self.dropout_pct,
            "dense_dim":  self.dense_dim,
            "l2_penalty": self.l2_penalty,
        })
        return config

def buildLoadingOutputs(inputs: Layer, num_assets: int, hidden_dim: int, l2_penalty: float,
                        dropout_pct: float, num_heads: int, dense_dim: int, num_factors: int) -> Layer:
    """
    Build loading outputs for the Transformer model.

    Parameters:
        inputs      (Layer): Input layer for the encoder.
        num_assets  (int):   Number of assets (entities).
        hidden_dim  (int):   Dimension of the hidden layers.
        l2_penalty  (float): L2 regularization penalty.
        dropout_pct (float): Dropout rate as a percentage.
        num_heads   (int):   Number of attention heads.
        dense_dim   (int):   Dimension of the dense layers.
        num_factors (int):   Number of factors in the model.

    Returns:
        Layer: Output layer of the loading outputs.
    """

    encoder_outputs = []
    for _ in range(num_assets):
        encoder_output = layers.Dense(hidden_dim, activation='linear',
                                    kernel_initializer='glorot_uniform',
                                    bias_initializer='random_uniform',
                                    kernel_regularizer=regularizers.l2(l2=l2_penalty))(inputs)
        encoder_output = layers.Dropout(dropout_pct)(encoder_output)
        encoder_output = TransformerEncoder(num_heads, hidden_dim, dropout_pct, dense_dim, l2_penalty)(encoder_output)
        encoder_output = layers.GlobalAveragePooling2D()(encoder_output)
        encoder_output = layers.Dense(num_factors, activation='linear',
                                    kernel_initializer='glorot_uniform',
                                    bias_initializer='random_uniform',
                                    kernel_regularizer=regularizers.l2(l2=l2_penalty))(encoder_output)
        encoder_outputs.append(encoder_output)

    # Stack the outputs and reshape to a matrix of dim num_assets by num_factors
    outputs = layers.Concatenate(axis=1)(encoder_outputs)  
    output  = layers.Reshape((num_assets, num_factors))(outputs)
    
    return output

def buildFactorOutputs(inputs: Layer, hidden_dim: int, l2_penalty: float, dropout_pct: float,
                        num_heads: int, dense_dim: int, num_factors: int) -> Layer:
    """
    Build factor outputs for the Transformer model.

    Parameters:
        inputs      (Layer): Input layer for the encoder.
        hidden_dim  (int):   Dimension of the hidden layers.
        l2_penalty  (float): L2 regularization penalty.
        dropout_pct (float): Dropout rate as a percentage.
        num_heads   (int):   Number of attention heads.
        dense_dim   (int):   Dimension of the dense layers.
        num_factors (int):   Number of factors in the model.

    Returns:
        Layer: Output layer of the factor outputs.
    """
    encoder_output = layers.Dense(hidden_dim, activation='linear',
                                kernel_initializer='glorot_uniform',
                                bias_initializer='random_uniform',
                                kernel_regularizer=regularizers.l2(l2=l2_penalty))(inputs)
    encoder_output = layers.Dropout(dropout_pct)(encoder_output)
    encoder_output = TransformerEncoder(num_heads, hidden_dim, dropout_pct, dense_dim, l2_penalty)(encoder_output)
    encoder_output = layers.GlobalAveragePooling1D()(encoder_output)
    output = layers.Dense(num_factors, activation='linear',
                        kernel_initializer='glorot_uniform',
                        bias_initializer='random_uniform')(encoder_output)
    return output

def buildTransformer(num_chars: int, num_macro_vectors: int, 
    num_assets: int, num_training_obs: int, 
    hps_dict: Dict, rel_weight_df: pd.DataFrame) -> keras.Model:
    """
    Build and compile a Transformer model.

    Parameters:
        num_chars (int): Number of characteristics.
        num_macro_vectors (int): Number of macroeconomic vectors.
        num_assets (int): Number of assets (entities).
        num_training_obs (ints): Number of observations in the training data for this model.
        hps_dict (Dict): hyperparameter values.
        rel_weight_df (pd.DataFrame): DataFrame containing columns for wieghting obj. func.

    Returns:
        keras.Model: Compiled Transformer model.
    """
    # Set dense dim to be two times that of hidden dimension
    dense_dim = int(hps_dict['hidden_dim']*2)

    # Build inputs
    input_loadings = keras.Input(shape=(hps_dict['num_lags'], num_chars, num_assets + num_macro_vectors))
    input_factors = keras.Input(shape=(hps_dict['num_lags'], num_assets))

    # Build mappings to outputs
    output_loadings = buildLoadingOutputs(
        input_loadings, num_assets, hps_dict['hidden_dim'],
        hps_dict['l2_penalty'], hps_dict['dropout_pct'], 
        hps_dict['num_heads'], dense_dim, hps_dict['number_factors'])
    output_factors = buildFactorOutputs(
        input_factors, hps_dict['hidden_dim'],
        hps_dict['l2_penalty'], hps_dict['dropout_pct'], 
        hps_dict['num_heads'], dense_dim, hps_dict['number_factors'])
    output = layers.Dot(axes=[2, 1])([output_loadings, output_factors])

    # Build optimizer
    decay_steps = int(num_training_obs / hps_dict['batch_size'])
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        hps_dict['initial_learning_rate'],
        decay_steps=decay_steps,
        decay_rate=hps_dict['learning_decay_rate'],
        staircase=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule,
        beta_1=hps_dict['adam_beta_1'], beta_2=hps_dict['adam_beta_2'], 
        clipnorm=hps_dict['adam_clipnorm'])
    
    # Create weights for the loss function for each asset in the output
    # remove data from weights that is before we have suff num of lags before
    w_df = rel_weight_df[int(num_assets*(hps_dict['num_lags']-1)):].reset_index(drop=True).copy()

    # fill missings in weight matrix of mcap and volume with min value so they get downweighted
    for col in ['mcap', 'volume']:
        median_values = w_df.groupby('date')[col].transform('min')
        w_df[col].fillna(median_values, inplace=True)

    # calc mcap weights
    total_mcap = w_df.groupby('date')['mcap'].transform('sum')
    w_df['mcap_weight'] = w_df['mcap'] / total_mcap

    # calc volume weights
    total_volume = w_df.groupby('date')['volume'].transform('sum')
    w_df['volume_weight'] = w_df['volume'] / total_volume

    # calc final weight column
    w_df['weight'] = (w_df.mcap_weight + w_df.volume_weight)/2

    # form the weighting matrix from mcap and volume
    training_weight_matrix = w_df.weight.values.reshape(-1,num_assets)
    training_weight_matrix = tf.cast(training_weight_matrix, tf.float32)

    # form weighing array for each asset
    loss_weights = tf.cast(tf.reduce_mean(training_weight_matrix, axis=0), tf.float32)
    loss_weights = list(loss_weights.numpy())
    
    # Build and compile model
    model = keras.Model(inputs=[input_loadings, input_factors], outputs=output)
    model.compile(optimizer=optimizer, 
        loss='mean_squared_error',
        metrics=['mse'],
        weighted_metrics=['mse'])
    #     loss_weights=loss_weights)

    return model

In [6]:
def fitTransformer(model: keras.Model,
        train_loading_input: tf.Tensor, train_factor_input: tf.Tensor, train_output: tf.Tensor, 
        rel_y_df: pd.DataFrame, lhs_col: str, lhs_pad: int, tc_per_hour: float, num_prtfl_qntls: int,
        train_datetimes: List[np.datetime64], num_assets: int, 
        hps_dict: Dict,
        val_loading_input: tf.Tensor = None, val_factor_input: tf.Tensor = None, val_output: tf.Tensor = None, 
    ) -> Tuple[keras.Model, int, float, float]:
    """
    Fit the Transformer model.

    Parameters: 
        model (keras.Model): The Transformer model to be trained.
        train_loading_input (tf.Tensor): Training loading input tensor.
        train_factor_input (tf.Tensor): Training factor input tensor.
        train_output (tf.Tensor): Training output tensor.
        rel_y_df (pd.DataFrame): lhs data.
        lhs_col (str): Name of LHS column.
        lhs_pad (int): The padded value for the lhs to signify missing.
        tc_per_hour (float): Transaction costs per hour in simple return.
        num_prtfl_qntls (int): Number of quantiles for long-short portfolio construction.
        train_datetimes (List[np.datetime64]): date at which validation period begins.
        num_assets (int): Number of assets (entities).
        hps_dict (Dict): hyperparameter values.
        val_loading_input (tf.Tensor, optional): Validation loading input tensor.
        val_factor_input (tf.Tensor, optional): Validation factor input tensor.
        val_output (tf.Tensor, optional): Validation output tensor.
        
    Returns:
        Tuple: A tuple containing the trained model, number of epochs trained for the best epoch,
                training r^2 predictive, and training geometric average return.
    """
    # Build additional obj func weights using number of missing assets
    missing_asset_matrix = tf.cast(train_output != lhs_pad, dtype=tf.int16)
    missing_asset_weight_array = np.sum(missing_asset_matrix, axis=1) / num_assets
    missing_asset_weight_array = missing_asset_weight_array / missing_asset_weight_array.sum()
    sample_weight_array = tf.cast(missing_asset_weight_array, dtype=tf.float32)

    # Build early stopping and model checkpoint callbacks and checkpoint objects and val data, if given.
    callbacks = []
    if hps_dict['early_stopping']:
        es = EarlyStopping(monitor='val_mse', mode='min', verbose=1, patience=hps_dict['patience'])
        model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min')
        callbacks += [es, model_checkpoint]

    if val_loading_input is not None and val_factor_input is not None and val_output is not None:
        validation_data = ([val_loading_input, val_factor_input], val_output)
    else:
        validation_data = None

    # Fit the model.    
    model.fit(x=[train_loading_input, train_factor_input],
                y=train_output,
                batch_size=hps_dict['batch_size'], epochs=hps_dict['num_epochs'], verbose=1, callbacks=callbacks,
                validation_data=validation_data,
                sample_weight=sample_weight_array)
    
    # If we did early stopping, load the best model and update number of epochs.xtract the best epoch.
    if hps_dict['early_stopping']:
        model.load_weights('best_model.h5')
        if (es.stopped_epoch > 0):
            best_epoch_train = es.stopped_epoch - hps_dict['patience']
        else:
            best_epoch_train = hps_dict['num_epochs']
    else:
        best_epoch_train = hps_dict['num_epochs']

    # Predict on the training data to return the training r^2_pred for obs with nonmissing return
    train_yhats = model.predict([train_loading_input, train_factor_input])
    train_mask = train_output != lhs_pad
    train_mse     = QuantTools.calcMSE(train_output[train_mask], train_yhats[train_mask])
    train_r2_pred = QuantTools.calcR2Pred(train_output[train_mask], train_yhats[train_mask])

    # Form array of training yhats
    train_yhats_array = tf.reshape(train_yhats, [-1]).numpy()

    # Form DataFrame of y and yhat values for nonmissing returns in training window
    train_pos_df = rel_y_df[rel_y_df.date.isin(train_datetimes)].copy()
    train_pos_df['yhats'] = train_yhats_array
    train_pos_df = train_pos_df[train_pos_df[lhs_col] != lhs_pad].reset_index(drop=True)

    # Calculate the geom avg return of given quantile long short portfolios
    train_pos_df = QuantTools.formPortfolioWeightsByQuantile(
            train_pos_df, num_prtfl_qntls)
    train_pos_df['returns'] = train_pos_df.prtfl_wght_hml*train_pos_df[lhs_col]
    train_returns = train_pos_df.groupby('date')['returns'].sum().values - tc_per_hour
    train_geom_mean_rtrn = QuantTools.calcGeomAvg(train_returns)
    
    return model, best_epoch_train, train_mse, train_r2_pred, train_geom_mean_rtrn


In [7]:
def runCV(y_df: pd.DataFrame, char_df: pd.DataFrame, ts_df: pd.DataFrame, weight_df: pd.DataFrame,
    aux_df: pd.DataFrame, asset_universe_dict: Dict[str, List],
    val_start_date: str, val_end_date: str, test_start_date: str, lhs_col: str,
    lhs_pad: int, rhs_pad: int, num_prtfl_qntls: int, tc_per_hour: float,
    hp_grid: Dict[str, list], periods_in_year: int, 
    cv_out_fp: str, arch_name: str,
    restrict_shortable_uni: bool=False, shortable_asset_uni: List[str]=[], 
    restrict_tradable_volume: bool=False, prct_volume_threshold: float=0.05, total_trade_volume_per_hour: int=1e6
    ) -> List[dict]:
    """
    Run custom step forward cross-validation.

    This function evaluates the performance of the Transformer-based factor model. 
    It uses the input data and hyperparameter grid to train multiple models with 
    different hyperparameter combinations and evaluates their performance using 
    step-forward cross-validation. The function outputs the results to a csv and
    returns a list of dictionaries containing the evaluation results for each model.

    Parameters:
        y_df (pd.DataFrame): DataFrame containing the target variable data.
        char_df (pd.DataFrame): DataFrame containing the characteristic data.
        ts_df (pd.DataFrame): DataFrame containing the previous return data.
        weight_df (pd.DataFrame): DataFrame containing columns for wieghting obj. func.
        aux_df (pd.DataFrame): DataFrame containing date asset mcap and volume_tp1 columns.
        asset_universe_dict (Dict[str, List]): Dictionary containing the asset universe 
                                                for each month in the study period.
        val_start_date (str): Start date of the validation period in 'YYYY-MM-DD' format.
        val_end_date (str): End date of the validation period in 'YYYY-MM-DD' format.
        test_start_date (str): Start date of the test period in 'YYYY-MM-DD' format.
        lhs_col (str): The name of the target variable (lhs) column in y_df.
        lhs_pad (int): The value to pad missing lhs values with.
        rhs_pad (int): The value to pad missing rhs values with.
        num_prtfl_qntls (int): Number of quantiles for long-short portfolio construction.
        tc_per_hour (float): Transaction cost per hour for calculating returns.
        hp_grid (Dict[str, list]): Hyperparameter grid to search for the best model.
        periods_in_year (int): Number of periods in a year for annualization.
        cv_out_fp (str): Filepath to save the cross-validation results in CSV format.
        arch_name (str): Name of the architecture/model being tested.
        restrict_shortable_uni (bool): If True, restrict shortable assets to those listed.
        shortable_asset_uni (List): assets that are shortable.
        restrict_tradable_volume (bool): If True, restrict volume to specified percentage.
        prct_volume_threshold (float): Fraction of datetime-asset volume that is tradable.
        total_trade_volume_per_hour (int): total dollar trade volume to place in long and short
            positions used to calculate what fraction of datetime-asset volume is traded.

    Returns:
        List[dict]: A list of dictionaries containing evaluation results for each model 
            in the hyperparameter grid. Each dictionary includes the model's hyperparameters, 
            evaluation metrics, and other relevant information.
    """
    # Subset the LHS and RHS to remove test period
    y_df = y_df[y_df.date < test_start_date].copy()
    char_df = char_df[char_df.date < test_start_date].copy()
    ts_df = ts_df[ts_df.date < test_start_date].copy()
    weight_df = weight_df[weight_df.date < test_start_date].copy()

    # Initialize cv result objects
    results_list = []

    # Determine RHS column names
    char_cols = list(char_df.columns.values)
    char_cols.remove('date')
    char_cols.remove('asset')
    macro_cols = [col for col in ts_df.columns if 'macro' in col]

    # Determine number of RHS values
    num_chars  = len(char_cols)
    num_macro_vectors = int(len(macro_cols)/len(char_cols))

    # Determine validation datetimes to loop over and datetimes to refit at
    val_dts_dict = {}
    val_sun_midnights = np.unique(y_df[(y_df.date>=val_start_date) 
        & (y_df.date.dt.hour==0) & (y_df.date.dt.day_of_week==6)].date.values)

    # Check if first val date is sunday midnight, if not then add the dates
    first_val_date = np.min(y_df[(y_df.date==val_start_date)].date.values)
    day_of_week_of_first_val_datetime = (first_val_date.astype('datetime64[D]').view('int64') - 4) % 7
    if day_of_week_of_first_val_datetime != 6:
        val_dts_dict[first_val_date] = np.unique(y_df[(y_df.date>=first_val_date) & (y_df.date<val_sun_midnights[0])].date.values)

    # Complete the dictionary with all the sundays as keys as the dates until the next sunday as the values
    for val_sun_midnight in val_sun_midnights:
        next_sun_midnight = val_sun_midnight + np.timedelta64(7, 'D')
        val_dts_dict[val_sun_midnight] = np.unique(y_df[(y_df.date>=val_sun_midnight) 
                                            & (y_df.date<next_sun_midnight)
                                            & (y_df.date<test_start_date)].date.values)

    # Loop over hp combinations
    keys = hp_grid.keys()
    values = hp_grid.values()
    hp_combos = list(itertools.product(*values))
    for hps in hp_combos:
        # Start the timer
        tic = time.perf_counter()

        # Create hp dictionary and other objects for this iteration
        hps_dict = dict(zip(keys, hps))
        hps_results_dict = hps_dict.copy()
        val_y_yhats_df = pd.DataFrame()

        # Report on progress
        print(hps_dict)

        # Initiate lists for results and start the loop over the val dates to fit and predict
        num_epochs_trained_list   = []
        train_mse_list            = []
        train_r2_pred_list        = []
        train_geom_mean_rtrn_list = []
        train_num_assets_list     = []
        num_model_params_list     = []
        yhats_spread_list         = []
        for val_datetime_start in list(val_dts_dict.keys()): 
            print(val_datetime_start)
            # form training and validation datetime objects
            train_datetimes = list(ts_df[ts_df.date < val_datetime_start].date.values)[hps_dict['num_lags']-1:]
            val_datetimes_window = val_dts_dict[val_datetime_start]
            val_datetime_end = np.max(val_datetimes_window)

            # form appropriate asset universe
            first_day_of_month_for_current_val_dt = np.datetime_as_string(val_datetime_start, unit='M')+'-01'
            asset_universe = asset_universe_dict[first_day_of_month_for_current_val_dt]

            # figure out what assets are not included in this asset universe to drop from previous return df
            prev_ret_cols_to_drop = [col for col in ts_df.columns 
                                        if (col != 'date') & ('asset_r_' in col) 
                                        if col.split('_')[2] not in asset_universe]

            # for all dfs, cut down assets and form relevant dataframes of up to end of current val week
            rel_y_df    = y_df[(y_df.asset.isin(asset_universe))
                                & ((y_df.date <= val_datetime_end))].copy()
            rel_char_df = char_df[(char_df.asset.isin(asset_universe))
                                & (char_df.date <= val_datetime_end)].copy()
            rel_ts_df   = ts_df[(ts_df.date <= val_datetime_end)].drop(columns=prev_ret_cols_to_drop, axis=1)
            rel_weight_df = weight_df[(weight_df.asset.isin(asset_universe))
                                    & (weight_df.date < val_datetime_start)].copy()

            # form rel prev asset return col names
            prev_asset_ret_cols = [col for col in rel_ts_df.columns if 'asset_r_' in col]

            # Update asset universe with intersection of what we have and what we should have!
            lhs_asset_uni = set(rel_y_df[rel_y_df.date>=val_datetime_start].asset.unique())
            char_asset_uni = set(rel_char_df[rel_char_df.date>=val_datetime_start].asset.unique())
            assert(len(lhs_asset_uni) == len(char_asset_uni)),"LHS and RHS dont have same assets!"
            assert(len(lhs_asset_uni) == len(prev_asset_ret_cols)), "LHS and prev ret dont have same assets!"
            asset_universe = set(asset_universe).intersection(lhs_asset_uni)
            assert(len(asset_universe) == len(char_asset_uni))

            # Set number of assets to consider
            num_assets = len(asset_universe)

            # normalize rhs data (note: this takes 2-15 min given big df's)
            rel_char_df = normalizeAndFillMissing(rel_char_df, lhs_col,
                            lhs_pad, rhs_pad, ignore_cols=['date', 'asset'])
            rel_ts_df = normalizeAndFillMissing(rel_ts_df, lhs_col, 
                            lhs_pad, rhs_pad, ignore_cols='date')

            # form training and validation data
            train_loading_input, train_factor_input, train_output = formLhsAndRhsTensors(rel_y_df, rel_char_df, rel_ts_df,
                                                                        train_datetimes, prev_asset_ret_cols, macro_cols,
                                                                        lhs_col, hps_dict['num_lags'], num_assets, num_chars)
            val_loading_input, val_factor_input, val_output = formLhsAndRhsTensors(rel_y_df, rel_char_df, rel_ts_df,
                                                                        val_datetimes_window, prev_asset_ret_cols, macro_cols,
                                                                        lhs_col, hps_dict['num_lags'], num_assets, num_chars)

            # Fit and predict
            num_training_obs = train_output.shape[0]
            model = buildTransformer(
                num_chars, num_macro_vectors, num_assets, num_training_obs, hps_dict, rel_weight_df)
            model, num_epochs_trained, train_mse, train_r2_pred, train_geom_mean_rtrn = fitTransformer(
                model, train_loading_input, train_factor_input, train_output,
                rel_y_df, lhs_col, lhs_pad, tc_per_hour, num_prtfl_qntls, 
                train_datetimes, num_assets, hps_dict,
                val_loading_input, val_factor_input, val_output)
            val_yhats = model.predict([val_loading_input, val_factor_input])
            val_yhats_array = tf.reshape(val_yhats, [-1]).numpy()

            # Save this val week returns
            num_epochs_trained_list.append(num_epochs_trained)
            train_mse_list.append(train_mse)
            train_r2_pred_list.append(train_r2_pred)
            train_geom_mean_rtrn_list.append(train_geom_mean_rtrn)
            train_num_assets_list.append(num_assets)
            num_model_params_list.append(model.count_params())
            temp_yhats_df = rel_y_df[rel_y_df.date >= val_datetime_start].reset_index(drop=True).copy()
            temp_yhats_df['yhats'] = val_yhats_array
            val_y_yhats_df = pd.concat([val_y_yhats_df, temp_yhats_df])

            # Save this week's yhat stat: avg diff across assets of the within asset 95th and 5th quantiles of yhats
            quantiles = temp_yhats_df.groupby('asset')['yhats'].quantile([0.05, 0.95]).unstack()
            quantiles['diff'] = quantiles[0.95] - quantiles[0.05]
            yhats_spread = quantiles['diff'].mean()
            yhats_spread_list.append(yhats_spread)
        
            # Output this week's results
            if True:
                val_week_df = val_y_yhats_df[(val_y_yhats_df.date>=val_datetime_start) 
                                        & (val_y_yhats_df.date<=val_datetime_end)].copy()
                val_week_df = val_week_df.sort_values(by=['date', 'asset'], ignore_index=True)
                val_week_y = val_week_df[lhs_col].values
                val_week_yhats = val_week_df['yhats'].values
                val_week_r_2_pred = QuantTools.calcR2Pred(val_week_y, val_week_yhats)
                print(f'\n this week r 2 pred: {val_week_r_2_pred}')
                val_week_df = QuantTools.formPortfolioWeightsByQuantile(val_week_df, num_prtfl_qntls, False, 'yhats')
                val_week_df['returns'] = val_week_df.prtfl_wght_hml*val_week_df[lhs_col]
                val_week_returns = (val_week_df.groupby('date')['returns'].sum().values - tc_per_hour)
                print(f'this week eq wght unrestricted geom avg ret {QuantTools.calcGeomAvg(val_week_returns)} \n')
                val_week_df = val_y_yhats_df[(val_y_yhats_df.date>=val_datetime_start) 
                                        & (val_y_yhats_df.date<=val_datetime_end)].copy()
                val_week_df = val_week_df.merge(aux_df, on=['date', 'asset'], how='left', validate='one_to_one')
                val_week_df = val_week_df.sort_values(by=['date', 'asset'], ignore_index=True)
                val_week_mcap_df = QuantTools.formPortfolioWeightsByQuantile(val_week_df, num_prtfl_qntls, mcap_weighted=True)
                val_week_mcap_df['returns'] = val_week_mcap_df.prtfl_wght_hml * val_week_mcap_df[lhs_col]
                val_week_returns_mcap = (val_week_mcap_df.groupby('date')['returns'].sum().values - tc_per_hour)
                print(f'this week mcap wght geom avg ret {QuantTools.calcGeomAvg(val_week_returns_mcap)} \n')
                val_week_df = val_y_yhats_df[(val_y_yhats_df.date>=val_datetime_start) 
                                        & (val_y_yhats_df.date<=val_datetime_end)].copy()
                val_week_df = val_week_df.merge(aux_df, on=['date', 'asset'], how='left', validate='one_to_one')
                val_week_df = val_week_df.sort_values(by=['date', 'asset'], ignore_index=True)
                val_week_mcap_df = QuantTools.formPortfolioWeightsByQuantile(val_week_df, num_prtfl_qntls, True, 'yhats',
                    restrict_shortable_uni, shortable_asset_uni, 
                    restrict_tradable_volume, prct_volume_threshold, total_trade_volume_per_hour)
                val_week_mcap_df['returns'] = val_week_mcap_df.prtfl_wght_hml * val_week_mcap_df[lhs_col]
                val_week_returns_mcap = (val_week_mcap_df.groupby('date')['returns'].sum().values - tc_per_hour)
                print(f'this week mcap wght shortable and volume restricted geom avg ret {QuantTools.calcGeomAvg(val_week_returns_mcap)} \n')

                # Skip to next hp point if this week val r2 is negative
                if True:
                    if val_week_r_2_pred < 0:
                        break
                        
        # Stop the timer after this hp grid point is completed
        toc = time.perf_counter()

        # For this hp point, add metadata to the results dict
        hps_results_dict['arch_name'] = arch_name
        hps_results_dict['val_start_date'] = val_start_date
        hps_results_dict['val_end_date'] = val_end_date
        hps_results_dict['runtime'] = round((toc - tic)/60, 0) 

        # Add training period statistics
        hps_results_dict['avg_epochs_trained'] = np.mean(num_epochs_trained_list)
        hps_results_dict['avg_num_assets'] = np.mean(train_num_assets_list)
        hps_results_dict['avg_num_model_params'] = np.mean(num_model_params_list)
        hps_results_dict['train_mse_min'] = np.min(train_mse_list)
        hps_results_dict['train_mse_mean'] = np.mean(train_mse_list)
        hps_results_dict['train_mse_max'] = np.max(train_mse_list)
        hps_results_dict['train_r2_pred_min'] = np.min(train_r2_pred_list)
        hps_results_dict['train_r2_pred_mean'] = np.mean(train_r2_pred_list)
        hps_results_dict['train_r2_pred_max'] = np.max(train_r2_pred_list)
        hps_results_dict['train_geom_mean_rtrn_min'] = np.min(train_geom_mean_rtrn_list)
        hps_results_dict['train_geom_mean_rtrn_mean'] = np.mean(train_geom_mean_rtrn_list)
        hps_results_dict['train_geom_mean_rtrn_max'] = np.max(train_geom_mean_rtrn_list)

        # Obtain validation period results
        assert(0 == val_y_yhats_df.isnull().sum().sum()), "Missing observations in the validation period."
        val_y_yhats_df = val_y_yhats_df.sort_values(by=['date', 'asset'], ignore_index=True)
        val_y_yhats_eq_df = val_y_yhats_df.copy()
        val_yhats      = val_y_yhats_df.yhats.values
        val_ys         = val_y_yhats_df[lhs_col].values
        assert len(val_yhats) == len(val_ys)
        val_y_yhats_pos_df = QuantTools.formPortfolioWeightsByQuantile(val_y_yhats_eq_df, num_prtfl_qntls)
        val_y_yhats_pos_df['returns'] = val_y_yhats_pos_df.prtfl_wght_hml*val_y_yhats_pos_df[lhs_col]
        returns = (val_y_yhats_pos_df.groupby('date')['returns'].sum().values - tc_per_hour)

        # Obtain validation period results, by MCAP!
        nrows_before = val_y_yhats_df.shape[0]
        val_y_yhats_mcap_df = val_y_yhats_df.copy()
        val_y_yhats_mcap_df = val_y_yhats_mcap_df.merge(aux_df, on=['date', 'asset'], how='left', validate='one_to_one')
        assert(nrows_before == val_y_yhats_mcap_df.shape[0])
        val_y_yhats_pos_mcap_df = QuantTools.formPortfolioWeightsByQuantile(val_y_yhats_mcap_df, num_prtfl_qntls, mcap_weighted=True)
        val_y_yhats_pos_mcap_df['returns'] = val_y_yhats_pos_mcap_df.prtfl_wght_hml*val_y_yhats_pos_mcap_df[lhs_col]
        returns_mcap = (val_y_yhats_pos_mcap_df.groupby('date')['returns'].sum().values - tc_per_hour)

        # Obtain validation period results, by mcap with volume and shortable restrictions
        nrows_before = val_y_yhats_df.shape[0]
        val_y_yhats_mcap_restrict_df = val_y_yhats_df.copy()
        val_y_yhats_mcap_restrict_df = val_y_yhats_mcap_restrict_df.merge(aux_df, on=['date', 'asset'], how='left', validate='one_to_one')
        assert(nrows_before == val_y_yhats_mcap_restrict_df.shape[0])
        val_y_yhats_pos_mcap_restrict_df = QuantTools.formPortfolioWeightsByQuantile(val_y_yhats_mcap_restrict_df, num_prtfl_qntls, True, 'yhats',
            restrict_shortable_uni, shortable_asset_uni, 
            restrict_tradable_volume, prct_volume_threshold, total_trade_volume_per_hour)
        val_y_yhats_pos_mcap_restrict_df['returns'] = (val_y_yhats_pos_mcap_restrict_df.prtfl_wght_hml
            * val_y_yhats_pos_mcap_restrict_df[lhs_col])
        returns_mcap_restricted = (val_y_yhats_pos_mcap_restrict_df.groupby('date')['returns'].sum().values - tc_per_hour)

        # Form validation period statistics
        hps_results_dict['val_mse']        = QuantTools.calcMSE(val_ys, val_yhats)
        hps_results_dict['val_r2_pred']    = QuantTools.calcR2Pred(val_ys, val_yhats)
        hps_results_dict['val_yhat_min']   = np.min(val_yhats)
        hps_results_dict['val_yhat_q1']    = np.quantile(val_yhats, q=0.25)
        hps_results_dict['val_yhat_q2']    = np.quantile(val_yhats, q=0.5)
        hps_results_dict['val_yhat_mean']  = np.mean(val_yhats)
        hps_results_dict['val_yhat_q3']    = np.quantile(val_yhats, q=0.75)
        hps_results_dict['val_yhat_max']   = np.max(val_yhats)
        hps_results_dict['geom_mean_1h']   = QuantTools.calcGeomAvg(returns)
        hps_results_dict['sharpe_annual']  = QuantTools.calcSharpe(returns, periods_in_year=periods_in_year)
        hps_results_dict['sortino_annual'] = QuantTools.calcSortino(returns, periods_in_year=periods_in_year)
        hps_results_dict['sd_annual']      = QuantTools.calcSD(returns, periods_in_year=periods_in_year)
        hps_results_dict['max_dd']         = QuantTools.calcMaxDrawdown(returns)
        hps_results_dict['avg_turnover']   = QuantTools.calcTSAvgTurnover(val_y_yhats_pos_df, 'prtfl_wght_hml')
        hps_results_dict['mcap_geom_mean_1h']   = QuantTools.calcGeomAvg(returns_mcap)
        hps_results_dict['mcap_sharpe_annual']  = QuantTools.calcSharpe(returns_mcap, periods_in_year=periods_in_year)
        hps_results_dict['mcap_sd_annual']      = QuantTools.calcSD(returns_mcap, periods_in_year=periods_in_year)
        hps_results_dict['mcap_max_dd']         = QuantTools.calcMaxDrawdown(returns_mcap)
        hps_results_dict['mcap_avg_turnover']   = QuantTools.calcTSAvgTurnover(val_y_yhats_pos_mcap_df, 'prtfl_wght_hml')
        hps_results_dict['mcap_restrict_geom_mean_1h']   = QuantTools.calcGeomAvg(returns_mcap_restricted)
        hps_results_dict['mcap_restrict_sharpe_annual']  = QuantTools.calcSharpe(returns_mcap_restricted, periods_in_year=periods_in_year)
        hps_results_dict['mcap_restrict_sd_annual']      = QuantTools.calcSD(returns_mcap_restricted, periods_in_year=periods_in_year)
        hps_results_dict['mcap_restrict_max_dd']         = QuantTools.calcMaxDrawdown(returns_mcap_restricted)
        hps_results_dict['mcap_restrict_avg_turnover']   = QuantTools.calcTSAvgTurnover(val_y_yhats_pos_mcap_restrict_df, 'prtfl_wght_hml')
        hps_results_dict['avg_over_val_wks_ast_95_m_5_quntl_yhats'] = np.mean(yhats_spread_list)

        # Add other information to the results
        meta_data_dict = {'num_rhs': num_chars,
            'lhs_pad': lhs_pad,
            'rhs_pad': rhs_pad,
            'num_qntls_prtls': num_prtfl_qntls,
            'tc_per_hour': tc_per_hour
        }
        hps_results_dict = {**meta_data_dict, **hps_results_dict}

        # Add the final datatime for the val period actually predicted in
        hps_results_dict['val_datetime_end'] = val_datetime_end

        # Save results to return
        results_list.append(hps_results_dict)

        # For this hp, save results to csv
        cv_df = pd.DataFrame(results_list)
        timestr = time.strftime("%Y%m%d_%H%M%S")
        fp = cv_out_fp + '-' + arch_name + '-' + timestr + '.csv'
        cv_df.to_csv(fp, index=False)
    
    # Return cv results
    return results_list


In [8]:
def fitAndPredictOOS(y_df: pd.DataFrame, char_df: pd.DataFrame, ts_df: pd.DataFrame, weight_df: pd.DataFrame,
    aux_df: pd.DataFrame, asset_universe_dict: Dict[str, List], hps_dict: Dict,
    oos_start_date: str, oos_end_date: str, 
    lhs_col: str, lhs_pad: int, rhs_pad: int, num_prtfl_qntls: int, tc_per_hour: float,
    oos_out_fp: str,
    restrict_shortable_uni: bool=False, shortable_asset_uni: List[str]=[], 
    restrict_tradable_volume: bool=False, prct_volume_threshold: float=0.05, total_trade_volume_per_hour: int=1e6) -> pd.DataFrame:
    """
    Predicts out-of-sample (OOS) returns for the test period cross section by 
        recursively refitting in each subsequent week of data.

    :param y_df: Dataframe containing the future returns, indexed by date and asset.
    :param char_df: Dataframe containing characteristic data, indexed by date and asset.
    :param ts_df: Dataframe containing time-series data, indexed by date.
    :param weight_df: DataFrame containing panel of mcap and volume data to use to weight obj func.
    :param aux_df: DataFrame containing date asset mcap and volume_tp1 columns.
    :param asset_universe_dict: Dictionary containing lists of asset identifiers for specific date string keys.
    :param hps_grid: Hyperparameter grid to be used in model fitting.
    :param oos_start_date: The start date for the OOS period.
    :param oos_end_date: The end date for the OOS period.
    :param lhs_col: The left-hand-side column name to be used in the model.
    :param lhs_pad: Padding value for missing values in the left-hand-side column.
    :param rhs_pad: Padding value for missing values in the right-hand-side columns.
    :param num_prtfl_qntls: Number of portfolio quantiles to be used.
    :param tc_per_hour: Transaction cost per hour.
    :param oos_out_fp: Filepath for saving the out-of-sample predictions.
    :param restrict_shortable_uni (bool): If True, restrict shortable assets to those listed.
    :param shortable_asset_uni (List): assets that are shortable.
    :param restrict_tradable_volume (bool): If True, restrict volume to specified percentage.
    :param prct_volume_threshold (float): Fraction of datetime-asset volume that is tradable.
    :param total_trade_volume_per_hour (int): total dollar trade volume to place in long and short
    :param positions used to calculate what fraction of datetime-asset volume is traded.
    
    :return: Dataframe containing OOS predictions, list of training R^2 prediction values, 
                list of training geometric mean returns, and a list of the number of model parameters.
    """
    # Form the fp to save yhats
    yyyymmdd1 = oos_start_date.replace('-', '')
    yyyymmdd2 = oos_end_date.replace('-', '')
    out_fp    = oos_out_fp+'_'+yyyymmdd1+'_'+yyyymmdd2+'.pkl'

    # Determine RHS columns
    char_cols = list(char_df.columns.values)
    char_cols.remove('date')
    char_cols.remove('asset')
    macro_cols = [col for col in ts_df.columns if 'macro' in col]

    # Determine number of RHS values
    num_chars  = len(char_cols)
    num_macro_vectors = int(len(macro_cols)/len(char_cols))

    # Determine oos period datetimes to loop over and datetimes to refit at
    oos_dts_dict = {}
    oos_sun_midnights = np.unique(y_df[(y_df.date>=oos_start_date) 
        & (y_df.date.dt.hour==0) & (y_df.date.dt.day_of_week==6)].date.values)

    # Check if first oos date is sunday midnight, if not then add the dates
    first_oos_datetime = np.min(y_df[(y_df.date==oos_start_date)].date.values)
    day_of_week_of_first_oos_datetime = (first_oos_datetime.astype('datetime64[D]').view('int64') - 4) % 7
    if day_of_week_of_first_oos_datetime != 6:
        oos_dts_dict[first_oos_datetime] = np.unique(y_df[(y_df.date>=first_oos_datetime) 
                                                    & (y_df.date<oos_sun_midnights[0])].date.values)

    # Complete the dictionary with all the sundays as keys as the dates until the next sunday as the values
    for oos_sun_midnight in oos_sun_midnights:
        next_sun_midnight = oos_sun_midnight + np.timedelta64(7, 'D')
        oos_dts_dict[oos_sun_midnight] = np.unique(y_df[(y_df.date>=oos_sun_midnight) 
                                            & (y_df.date<next_sun_midnight)].date.values)
        
    # Create result objects to return
    oos_y_yhats_df            = pd.DataFrame()
    train_r2_pred_list        = []
    train_geom_mean_rtrn_list = []
    train_mse_list            = []
    num_model_params_list     = []

    # Loop over all the datetimes in the oos period where we want to refit the model
    for oos_datetime_start in list(oos_dts_dict.keys()):
        # Monitor progress
        print('Currently fitting and predicting for the week starting: ')
        print(oos_datetime_start)

        # form training and oos datetime objects
        train_datetimes = list(ts_df[ts_df.date < oos_datetime_start].date.values)[hps_dict['num_lags']-1:]
        oos_datetimes_window = oos_dts_dict[oos_datetime_start]
        oos_datetime_end = np.max(oos_datetimes_window)

        # form appropriate asset universe and update num asset parameter
        first_day_of_month_for_current_oos_dt = np.datetime_as_string(oos_datetime_start, unit='M')+'-01'
        asset_universe = asset_universe_dict[first_day_of_month_for_current_oos_dt]

        # figure out what assets are not included in this asset universe to drop from previous return df
        prev_ret_cols_to_drop = [col for col in ts_df.columns 
                                if (col != 'date') & ('asset_r_' in col) 
                                if col.split('_')[2] not in asset_universe]

        # for all dfs, cut down assets and form relevant dataframes of up to end of current oos week
        rel_y_df    = y_df[(y_df.asset.isin(asset_universe))
                            & ((y_df.date <= oos_datetime_end))].copy()
        rel_char_df = char_df[(char_df.asset.isin(asset_universe))
                            & (char_df.date <= oos_datetime_end)].copy()
        rel_ts_df   = ts_df[(ts_df.date <= oos_datetime_end)].drop(columns=prev_ret_cols_to_drop, axis=1)
        rel_weight_df = weight_df[(weight_df.asset.isin(asset_universe))
                            & (weight_df.date < oos_datetime_start)].copy()

        # form rel prev asset return col names
        prev_asset_ret_cols = [col for col in rel_ts_df.columns if 'asset_r_' in col]

        # Update asset universe with intersection of what we have and what we should have!
        lhs_asset_uni = set(rel_y_df[rel_y_df.date>=oos_datetime_start].asset.unique())
        char_asset_uni = set(rel_char_df[rel_char_df.date>=oos_datetime_start].asset.unique())
        assert(len(lhs_asset_uni) == len(char_asset_uni)),"LHS and RHS dont have same assets!"
        assert(len(lhs_asset_uni) == len(prev_asset_ret_cols)), "LHS and prev ret dont have same assets!"
        asset_universe = set(asset_universe).intersection(lhs_asset_uni)
        assert(len(asset_universe) == len(char_asset_uni))

        # Set number of assets to consider
        num_assets = len(asset_universe)

        # normalize rhs data (note: this takes 2-15 min given big df's)
        rel_char_df = normalizeAndFillMissing(rel_char_df, lhs_col,
                        lhs_pad, rhs_pad, ignore_cols=['date', 'asset'])
        rel_ts_df = normalizeAndFillMissing(rel_ts_df, lhs_col, 
                        lhs_pad, rhs_pad, ignore_cols='date')

        # form training and oos data
        train_loading_input, train_factor_input, train_output = formLhsAndRhsTensors(rel_y_df, rel_char_df, rel_ts_df,
            train_datetimes, prev_asset_ret_cols, macro_cols, lhs_col, hps_dict['num_lags'], num_assets, num_chars)
        oos_loading_input, oos_factor_input, oos_output = formLhsAndRhsTensors(rel_y_df, rel_char_df, rel_ts_df,
            oos_datetimes_window, prev_asset_ret_cols, macro_cols, lhs_col, hps_dict['num_lags'], num_assets, num_chars)
        
        # Fit and predict
        num_training_obs = train_output.shape[0]
        model = buildTransformer(
            num_chars, num_macro_vectors, num_assets, num_training_obs, hps_dict, rel_weight_df)
        model, num_epochs_trained, train_mse, train_r2_pred, train_geom_mean_rtrn = fitTransformer(
            model, train_loading_input, train_factor_input, train_output,
            rel_y_df, lhs_col, lhs_pad, tc_per_hour, num_prtfl_qntls, 
            train_datetimes, num_assets, hps_dict)
        oos_yhats = model.predict([oos_loading_input, oos_factor_input])
        oos_yhats_array = tf.reshape(oos_yhats, [-1]).numpy()
        
        # Save this OOS week results
        train_r2_pred_list.append(train_r2_pred)
        train_geom_mean_rtrn_list.append(train_geom_mean_rtrn)
        train_mse_list.append(train_mse)
        num_model_params_list.append(model.count_params())
        temp_y_yhats_df = rel_y_df[rel_y_df.date >= oos_datetime_start].reset_index(drop=True).copy()
        temp_y_yhats_df['yhats'] = oos_yhats_array
        oos_y_yhats_df = pd.concat([oos_y_yhats_df, temp_y_yhats_df])

        # Display this week's results
        if True:
            oos_week_df = oos_y_yhats_df[(oos_y_yhats_df.date>=oos_datetime_start) 
                                    & (oos_y_yhats_df.date<=oos_datetime_end)].copy()
            oos_week_df = oos_week_df.sort_values(by=['date', 'asset'], ignore_index=True)
            oos_week_y = oos_week_df[lhs_col].values
            oos_week_yhats = oos_week_df['yhats'].values
            oos_week_r_2_pred = QuantTools.calcR2Pred(oos_week_y, oos_week_yhats)
            print(f'\n this week r 2 pred: {oos_week_r_2_pred}')
            oos_week_eq_df = oos_week_df.copy()
            oos_week_eq_df = QuantTools.formPortfolioWeightsByQuantile(oos_week_eq_df, num_prtfl_qntls)
            oos_week_eq_df['returns'] = oos_week_eq_df.prtfl_wght_hml*oos_week_eq_df[lhs_col]
            oos_week_returns_eq = (oos_week_eq_df.groupby('date')['returns'].sum().values - tc_per_hour)
            print(f'this week eq wght unrestricted geom avg ret {QuantTools.calcGeomAvg(oos_week_returns_eq)} \n')
            oos_week_mcap_df = oos_week_df.copy()
            oos_week_mcap_df = oos_week_mcap_df.merge(aux_df, on=['date', 'asset'], how='left', validate='one_to_one')
            oos_week_mcap_df = QuantTools.formPortfolioWeightsByQuantile(oos_week_mcap_df, num_prtfl_qntls, mcap_weighted=True)
            oos_week_mcap_df['returns'] = oos_week_mcap_df.prtfl_wght_hml*oos_week_mcap_df[lhs_col]
            oos_week_returns_mcap = (oos_week_mcap_df.groupby('date')['returns'].sum().values - tc_per_hour)
            print(f'this week mcap wght geom avg ret {QuantTools.calcGeomAvg(oos_week_returns_mcap)} \n')
            oos_week_mcap_restrict_df = oos_week_df.copy()
            oos_week_mcap_restrict_df = oos_week_mcap_restrict_df.merge(aux_df, on=['date', 'asset'], how='left', validate='one_to_one')
            oos_week_mcap_restrict_df = QuantTools.formPortfolioWeightsByQuantile(oos_week_mcap_restrict_df, num_prtfl_qntls, True, 'yhats',
                restrict_shortable_uni, shortable_asset_uni, 
                restrict_tradable_volume, prct_volume_threshold, total_trade_volume_per_hour)
            oos_week_mcap_restrict_df['returns'] = oos_week_mcap_restrict_df.prtfl_wght_hml*oos_week_mcap_restrict_df[lhs_col]
            oos_week_returns_mcap_restrict = (oos_week_mcap_restrict_df.groupby('date')['returns'].sum().values - tc_per_hour)
            print(f'this week mcap wght shortable and volume restricted geom avg ret {QuantTools.calcGeomAvg(oos_week_returns_mcap_restrict)} \n')

        # Try to clear out memory, which is kinda unclear how this works...lol.
        del model
        del train_loading_input, train_factor_input, train_output
        del oos_loading_input, oos_factor_input, oos_output
        gc.collect()

        # Save the file in case this breaks part way through
        oos_y_yhats_df.to_pickle(out_fp)

    return oos_y_yhats_df, train_r2_pred_list, train_geom_mean_rtrn_list, num_model_params_list, train_mse_list


In [9]:
if __name__ == "__main__":
    # set args
    IN_TRAIN_FP     = '../data/clean/panel_train.pkl'
    IN_TEST_FP      = '../data/clean/panel_test.pkl'
    ASSET_IN_FP     = '../data/clean/strict_asset_universe_hourly_dict.pickle'
    CV_OUT_FP       = '../output/high_dim_fm/cv_results'
    TEST_OUT_FP     = '../data/clean/test_yhats_transformer'
    LHS_COL         = 'r_ex_tp1'
    VAL_START_DATE  = '2022-01-01' # NOTE: A Sunday.
    VAL_END_DATE    = '2022-07-02' # NOTE: A Saturday
    TEST_START_DATE = '2022-07-03' # NOTE: A Sunday.
    TEST_END_DATE   = '2023-01-01' # NOTE: A Sunday.
    PERIODS_IN_YEAR = int(365.25*24)
    ARCH_NAME       = 'transformer'
    LHS_PAD         = 0
    RHS_PAD         = 0
    NUM_PRTFL_QNTLS = 5
    TC_PER_HOUR     = 0
    HP_GRID         = {'number_factors': [1], # NOTE: based on cv, seems opt for sure.
        'num_lags': [1], # NOTE: based on cv, 1 seems opt for sure w/o doing feat selection across entire matrix.
        'hidden_dim': [32], # NOTE: 64 or 16 may be more optimal
        'num_heads': [1], # NOTE: 2 heads may be better
        'l2_penalty': [1e-3],
        'dropout_pct': [0.5], # NOTE: this seems good as other hps can accord for improved setting here; i.e. it's redundant.
        'initial_learning_rate': [4e-4],
        'learning_decay_rate': [0.99],
        'adam_beta_1': [0.9],
        'adam_beta_2': [0.99],
        'adam_clipnorm': [100],
        'batch_size': [128], 
        'num_epochs': [100], 
        'early_stopping': [True],
        'patience': [5]}
    RESTRICT_SHORTABLE_UNI = True
    SHORTABLE_UNI = ['aave', 'algo', 'rep', 'btc', 'bch', 'ada', 'link', 'comp', 'atom', 'dash', 'doge', 'dot', 
        'eos', 'eth', 'etc', 'fil', 'flow', 'kava', 'keep', 'ksm', 'ltc', 'omg', 'matic',  
        'sc', 'sol', 'trx', 'uni', 'xlm', 'xmr', 'xrp', 'xtz', 'zec']
    SHORTABLE_UNI += ['ape', 'avax', 'axs', 'bat', 'crv', 
        'lrc', 'mana', 'nano', 'sand', 'sc', 'grt', 'waves'] # NOTE: Post Sept 1 2022 add in 
    RESTRICT_TRADABLE_VOLUME = True
    PRCT_VOLUME_THRESHOLD = 0.05
    TOTAL_TRADE_VOLUME_PER_HOUR = 1e6
    
    # read in data
    with open(ASSET_IN_FP, "rb") as f:
        asset_universe_dict = pickle.load(f)
    train_df = pd.read_pickle(IN_TRAIN_FP)
    test_df  = pd.read_pickle(IN_TEST_FP)
    all_df = pd.concat([train_df, test_df])

    # drop data beyond the test period
    all_df = all_df[all_df.date < TEST_END_DATE]

    # subset rows and columns and separate input and output data
    # NOTE: Jan 2019 seems like optimal state date
    y_df, char_df, ts_df, weight_df = subsetRowsAndColumns(all_df, LHS_COL)
    gc.collect()

    # prep aux data
    mcap_df = all_df[['date', 'asset', 'char_mcap_t']].copy()
    mcap_df = mcap_df.rename(columns={'char_mcap_t': 'mcap'})
    volume_df = all_df[['date', 'asset', 'char_volume_t']].copy()
    volume_df['date'] -= pd.Timedelta(hours=1)
    volume_df = volume_df.rename(columns={'char_volume_t': 'volume_tp1'})
    aux_df = mcap_df.merge(volume_df, on=['date', 'asset'], how='left', validate='one_to_one')
    aux_df.loc[aux_df.volume_tp1.isnull(), 'volume_tp1'] = 1e6
    
    # pad the lhs data
    y_df = normalizeAndFillMissing(y_df, 
            lhs_col=LHS_COL, lhs_pad=LHS_PAD, rhs_pad=RHS_PAD, 
            ignore_cols=['date', 'asset', LHS_COL])

    # # run custom step forward cross validation
    # cv_results_list = runCV(y_df, char_df, ts_df, weight_df, aux_df, asset_universe_dict, 
    #     VAL_START_DATE, VAL_END_DATE, TEST_START_DATE,
    #     LHS_COL, LHS_PAD, RHS_PAD, NUM_PRTFL_QNTLS, TC_PER_HOUR,
    #     HP_GRID, PERIODS_IN_YEAR, CV_OUT_FP, ARCH_NAME,
    #     RESTRICT_SHORTABLE_UNI, SHORTABLE_UNI,
    #     RESTRICT_TRADABLE_VOLUME, PRCT_VOLUME_THRESHOLD, TOTAL_TRADE_VOLUME_PER_HOUR)

    # Opt HPS
    opt_hps_dict = {'number_factors': 1,
        'num_lags': 1,
        'hidden_dim': 32,
        'num_heads': 1,
        'l2_penalty': 1e-3,
        'dropout_pct': 0.5,
        'initial_learning_rate': 4e-4,
        'learning_decay_rate': 0.99,
        'adam_beta_1': 0.9,
        'adam_beta_2': 0.99,
        'adam_clipnorm': 100,
        'batch_size': 128,
        'num_epochs': 20,
        'early_stopping': False,
        'patience': 5}
    
    # Fit and predict into test period
    oos_y_yhats_df, train_r2_pred_list, train_geom_mean_rtrn_list, num_model_params_list, train_mse_list = fitAndPredictOOS(
        y_df, char_df, ts_df, weight_df, aux_df, asset_universe_dict, opt_hps_dict, 
        TEST_START_DATE, TEST_END_DATE, LHS_COL, LHS_PAD, RHS_PAD, NUM_PRTFL_QNTLS, TC_PER_HOUR, TEST_OUT_FP,
        RESTRICT_SHORTABLE_UNI, SHORTABLE_UNI,
        RESTRICT_TRADABLE_VOLUME, PRCT_VOLUME_THRESHOLD, TOTAL_TRADE_VOLUME_PER_HOUR)
    
    # Save the oos training mse's after forming the fp
    yyyymmdd1 = TEST_START_DATE.replace('-', '')
    yyyymmdd2 = TEST_END_DATE.replace('-', '')
    oos_train_mse_out_fp = '../data/clean/test_train_mse_transformer'
    out_fp    = oos_train_mse_out_fp+'_'+yyyymmdd1+'_'+yyyymmdd2+'.pkl'
    with open(out_fp, "wb") as f:
        pickle.dump(train_mse_list, f)


Currently fitting and predicting for the week starting: 
2022-07-03T00:00:00.000000000


2023-10-18 09:25:45.011311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5234 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3070 Ti, pci bus id: 0000:65:00.0, compute capability: 8.6


Epoch 1/20


2023-10-18 09:27:28.028698: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel/dropout/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer
2023-10-18 09:27:46.861782: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-10-18 09:27:47.088271: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600
2023-10-18 09:27:47.215701: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x45627e60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-10-18 09:27:47.215725: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA GeForce RTX 3070 Ti, Compute Capability 8.6
2023-10-18 09:27:47.221432: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:2

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.14003771543502808
this week eq wght unrestricted geom avg ret -0.00036713564708712454 

this week mcap wght geom avg ret 1.816705394697138e-05 

this week mcap wght shortable and volume restricted geom avg ret 0.00024038483670030253 

Currently fitting and predicting for the week starting: 
2022-07-10T00:00:00.000000000
Epoch 1/20


2023-10-18 09:55:35.077415: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_1/dropout_84/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.13377785682678223
this week eq wght unrestricted geom avg ret 0.0003189894332280563 

this week mcap wght geom avg ret 0.0003811316870241388 

this week mcap wght shortable and volume restricted geom avg ret 0.0003614925877146735 

Currently fitting and predicting for the week starting: 
2022-07-17T00:00:00.000000000
Epoch 1/20


2023-10-18 10:23:32.013234: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_2/dropout_168/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.04690772294998169
this week eq wght unrestricted geom avg ret 0.0006226367381232389 

this week mcap wght geom avg ret 0.00013163420409201265 

this week mcap wght shortable and volume restricted geom avg ret 0.00030990573319655823 

Currently fitting and predicting for the week starting: 
2022-07-24T00:00:00.000000000
Epoch 1/20


2023-10-18 10:51:37.496705: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_3/dropout_252/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.044119298458099365
this week eq wght unrestricted geom avg ret -5.4853810686150695e-05 

this week mcap wght geom avg ret 0.0005449998141653367 

this week mcap wght shortable and volume restricted geom avg ret 0.0006660788176029442 

Currently fitting and predicting for the week starting: 
2022-07-31T00:00:00.000000000
Epoch 1/20


2023-10-18 11:19:51.862494: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_4/dropout_336/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.10799604654312134
this week eq wght unrestricted geom avg ret 0.0006733242487895197 

this week mcap wght geom avg ret 0.0009393700630013768 

this week mcap wght shortable and volume restricted geom avg ret 0.0012315879402724494 

Currently fitting and predicting for the week starting: 
2022-08-07T00:00:00.000000000
Epoch 1/20


2023-10-18 11:46:50.534188: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_5/dropout_420/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.06244319677352905
this week eq wght unrestricted geom avg ret 2.0623062380931145e-05 

this week mcap wght geom avg ret -1.170864808552885e-05 

Of a total 168 datetimes, 12.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 12.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.0002974873018188884 

Currently fitting and predicting for the week starting: 
2022-08-14T00:00:00.000000000
Epoch 1/20


2023-10-18 12:09:41.100722: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_6/dropout_483/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.07195639610290527
this week eq wght unrestricted geom avg ret -0.00015553179222038693 

this week mcap wght geom avg ret -0.0006022945005412783 

Of a total 168 datetimes, 4.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 4.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.0002237447071240517 

Currently fitting and predicting for the week starting: 
2022-08-21T00:00:00.000000000
Epoch 1/20


2023-10-18 12:30:46.060913: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_7/dropout_546/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.06923556327819824
this week eq wght unrestricted geom avg ret 0.00021151425602461416 

this week mcap wght geom avg ret -0.0003298614256425081 

Of a total 168 datetimes, 10.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 10.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.0002658245776305801 

Currently fitting and predicting for the week starting: 
2022-08-28T00:00:00.000000000
Epoch 1/20


2023-10-18 12:46:26.381605: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_8/dropout_609/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.06792277097702026
this week eq wght unrestricted geom avg ret 0.00020280375511427806 

this week mcap wght geom avg ret 0.0008236731561461763 

this week mcap wght shortable and volume restricted geom avg ret 0.001110688255844039 

Currently fitting and predicting for the week starting: 
2022-09-04T00:00:00.000000000
Epoch 1/20


2023-10-18 13:05:30.749343: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_9/dropout_672/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.09121567010879517
this week eq wght unrestricted geom avg ret 0.0002027078785660219 

this week mcap wght geom avg ret 0.0001766293844411848 

this week mcap wght shortable and volume restricted geom avg ret 0.000501469929780507 

Currently fitting and predicting for the week starting: 
2022-09-11T00:00:00.000000000
Epoch 1/20


2023-10-18 13:25:44.030025: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_10/dropout_741/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.048821985721588135
this week eq wght unrestricted geom avg ret 0.0005730488665676159 

this week mcap wght geom avg ret 0.00048487367345240173 

this week mcap wght shortable and volume restricted geom avg ret 0.0005899218282714447 

Currently fitting and predicting for the week starting: 
2022-09-18T00:00:00.000000000
Epoch 1/20


2023-10-18 13:45:52.421685: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_11/dropout_810/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.08785879611968994
this week eq wght unrestricted geom avg ret -0.00023776592333502045 

this week mcap wght geom avg ret -0.0003583174564151248 

Of a total 168 datetimes, 2.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 2.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret -0.0003042637165940887 

Currently fitting and predicting for the week starting: 
2022-09-25T00:00:00.000000000
Epoch 1/20


2023-10-18 14:05:56.488354: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_12/dropout_879/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.04704552888870239
this week eq wght unrestricted geom avg ret 0.0002371767616469178 

this week mcap wght geom avg ret -0.00015378314071456956 

this week mcap wght shortable and volume restricted geom avg ret -4.091691064855851e-06 

Currently fitting and predicting for the week starting: 
2022-10-02T00:00:00.000000000
Epoch 1/20


2023-10-18 14:26:00.070773: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_13/dropout_948/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.03438788652420044
this week eq wght unrestricted geom avg ret -5.4096575447304396e-05 

this week mcap wght geom avg ret 1.5819468193400965e-05 

Of a total 168 datetimes, 1.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 1.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.00023137214156920827 

Currently fitting and predicting for the week starting: 
2022-10-09T00:00:00.000000000
Epoch 1/20


2023-10-18 14:44:31.629749: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_14/dropout_1008/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.05856281518936157
this week eq wght unrestricted geom avg ret 0.00021809003634731816 

this week mcap wght geom avg ret 0.0002713941456573554 

this week mcap wght shortable and volume restricted geom avg ret 0.00032425116414969146 

Currently fitting and predicting for the week starting: 
2022-10-16T00:00:00.000000000
Epoch 1/20


2023-10-18 15:03:13.663002: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_15/dropout_1068/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.033099353313446045
this week eq wght unrestricted geom avg ret 0.0006202409043547519 

this week mcap wght geom avg ret 0.00042369508457684724 

Of a total 168 datetimes, 1.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 1.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.0003923777226872982 

Currently fitting and predicting for the week starting: 
2022-10-23T00:00:00.000000000
Epoch 1/20


2023-10-18 15:21:29.622002: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_16/dropout_1128/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.08055883646011353
this week eq wght unrestricted geom avg ret -0.0004199763282427549 

this week mcap wght geom avg ret 0.00035039179260465403 

Of a total 168 datetimes, 1.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 1.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.0004232193946132057 

Currently fitting and predicting for the week starting: 
2022-10-30T00:00:00.000000000
Epoch 1/20


2023-10-18 15:40:25.757339: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_17/dropout_1188/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.011583387851715088
this week eq wght unrestricted geom avg ret 7.613726491717543e-06 

this week mcap wght geom avg ret 0.0005897451877714399 

this week mcap wght shortable and volume restricted geom avg ret 0.000867074943283308 

Currently fitting and predicting for the week starting: 
2022-11-06T00:00:00.000000000
Epoch 1/20


2023-10-18 15:59:07.056002: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_18/dropout_1248/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.1071922779083252
this week eq wght unrestricted geom avg ret 0.0004381831907547262 

this week mcap wght geom avg ret 0.000607477325550887 

this week mcap wght shortable and volume restricted geom avg ret 0.0008046338766172223 

Currently fitting and predicting for the week starting: 
2022-11-13T00:00:00.000000000
Epoch 1/20


2023-10-18 16:16:43.282762: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_19/dropout_1302/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.09894543886184692
this week eq wght unrestricted geom avg ret 0.0003836521644557944 

this week mcap wght geom avg ret 0.0009670300140383059 

this week mcap wght shortable and volume restricted geom avg ret 0.0011636226399371896 

Currently fitting and predicting for the week starting: 
2022-11-20T00:00:00.000000000
Epoch 1/20


2023-10-18 16:35:24.628183: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_20/dropout_1356/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: -0.48987627029418945
this week eq wght unrestricted geom avg ret -0.0013941219775478464 

this week mcap wght geom avg ret -0.001649355185548651 

this week mcap wght shortable and volume restricted geom avg ret -0.0018335461980441004 

Currently fitting and predicting for the week starting: 
2022-11-27T00:00:00.000000000
Epoch 1/20


2023-10-18 16:54:19.826423: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_21/dropout_1410/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.06907325983047485
this week eq wght unrestricted geom avg ret 8.472690216909662e-05 

this week mcap wght geom avg ret 0.00022012416524175293 

this week mcap wght shortable and volume restricted geom avg ret 0.00019382332767925448 

Currently fitting and predicting for the week starting: 
2022-12-04T00:00:00.000000000
Epoch 1/20


2023-10-18 17:12:04.593443: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_22/dropout_1464/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.08774739503860474
this week eq wght unrestricted geom avg ret -7.410754272441089e-05 

this week mcap wght geom avg ret 0.0003118786879270363 

Of a total 168 datetimes, 1.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 2.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.00019235275883189296 

Currently fitting and predicting for the week starting: 
2022-12-11T00:00:00.000000000
Epoch 1/20


2023-10-18 17:30:06.176769: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_23/dropout_1518/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.041448235511779785
this week eq wght unrestricted geom avg ret -9.681147252293965e-05 

this week mcap wght geom avg ret 0.0005051446572765794 

Of a total 168 datetimes, 6.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.0006131146917904839 

Currently fitting and predicting for the week starting: 
2022-12-18T00:00:00.000000000
Epoch 1/20


2023-10-18 17:45:25.134280: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_24/dropout_1572/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.07934528589248657
this week eq wght unrestricted geom avg ret -0.00012431510983279548 

this week mcap wght geom avg ret 0.0003843903947859939 

Of a total 168 datetimes, 1.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 6.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.0004483560870358616 

Currently fitting and predicting for the week starting: 
2022-12-25T00:00:00.000000000
Epoch 1/20


2023-10-18 18:03:41.980712: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_25/dropout_1626/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

 this week r 2 pred: 0.03308725357055664
this week eq wght unrestricted geom avg ret 4.313770902508729e-05 

this week mcap wght geom avg ret 0.00030349762214232356 

Of a total 168 datetimes, 5.0% have insufficient volume to trade, or not shortable.
Of a total 168 datetimes, 10.0% have insufficient volume to trade.
this week mcap wght shortable and volume restricted geom avg ret 0.00022951117901937046 



In [10]:
# NOTE: Q3+Q4 2022 HPS
# leverage: 3x
# opt_hps_dict = {'number_factors': 1,
#     'num_lags': 2,
#     'hidden_dim': 32,
#     'num_heads': 1,
#     'l2_penalty': 1e-3,
#     'dropout_pct': 0.5,
#     'initial_learning_rate': 4e-4,
#     'learning_decay_rate': 0.99,
#     'adam_beta_1': 0.9,
#     'adam_beta_2': 0.99,
#     'adam_clipnorm': 100,
#     'batch_size': 128,
#     'num_epochs': 20,
#     'early_stopping': False,
#     'patience': 5}
