In [2]:
%run -i writefile2.py

In [3]:
%%writefile2 --name honest_forward.py --source train_honest_forward.ipynb

"""
implements an honest forward-prediction scheme
where the sliding window eventually operates on its on predictions after a 
burn-in phase.
"""

import require
import numpy as np
n_outcomes = len( require.single( "owid_outcomes" ))

class honest_forward:

    def predict_replace( self, df, start = None, length = None, callback = None ):

        callback = ( lambda * _: None ) if callback is None else callback
        learner = self.learner
        
        min_start = learner.length_l + learner.lag
        min_length = learner.length_r
        
        start = min_start if start is None else start
        start = max( start, min_start )
        length = df.shape[ 0 ] - start if length is None else length
        assert length >= min_length
        assert start + length <= df.shape[ 0 ]
        
        n_predictions = 1 + length - learner.length_r
        M = learner.linear_operator
        n_rows_total = M.shape[ 0 ] * n_predictions
        lag = learner.lag

        df_pred = df.copy( )
        df_pred.iloc[ start:, :n_outcomes ] = np.nan
    
        for i in range( n_predictions ):
    
            window = df_pred.iloc[ start - lag - learner.length_l + i: start - lag + i, : ].to_numpy( )
            y = learner.predict( window )
            assert y.shape == ( learner.length_r, n_outcomes )
            df_pred.iloc[ start + i: start + i + learner.length_r, :n_outcomes ] = y
            callback(( i + 1 ) / n_predictions )
    
        return df_pred

In [4]:
%%writefile2

"""
trains a single weak learner.
cache node wrapper around honest_forward.py and train_weak_learner.py that caches the trained model
"""

import nodes
import require

@nodes.generic_node
def train_honest_forward( subset = slice( None )):

    import numpy as np
    model = require.single( "honest_forward" )

    n_estimators = 1
    max_depth = 20
    max_features = 1.0
    
    def main( weak_learner_node: nodes.find( "train_weak_learner" ).given( 
            
            subset = subset, 
            length_l = 100, 
            lag = 50, 
            length_r = 1, 
            linear_operator = np.identity( 1 ),
            type = "forest",
            learner_kwargs = dict( max_depth = max_depth, max_features = max_features, n_jobs = 1, n_estimators = n_estimators )
        )):

        m = model( )
        learner = weak_learner_node.result

        theory = ""
        theory += "\n### General\n"
        theory += f"""This model learns a function between two lagged sliding windows via a random forest 
        of size ${ n_estimators }$ with a maximum depth of ${ max_depth }$ and a feature ratio of 
        ${ int( max_features * 100 )}\\%$. Prediction is performed in an iterative forward fashion 
        (hence the name *honest*): the target dataframe on which we predict is only used for the necessary 
        burn-in period of the left window, the rest is predicted on its own prior output. This property makes it
        easy to validate against out-of-sample time series."""
        theory += "\n### Training dimensions\n"
        theory += learner.theory_info
        theory += f"In this case the linear operator corresponds to $M=I_{{1,1}}$, the $1 \\times 1$ identity matrix."
            
        m.__dict__.update( learner = learner, info = dict( table = learner.info_dict, theory = theory ))
        return m

    return main

node = train_honest_forward