# Put content into template

In [None]:
p = Path.cwd()/'dashboard01.html'
template = Template(p.read_text())
o = Path.cwd()/'dashboard02.html'
o.write_text(template.render({'stage0': stage0,
                              's1data': s1data,
                              'stage1': stage1,
                              'stage2': stage2, 
                              'stage3': stage3, 
                              's4params': s4params, 
                              's5_before_asfreq': s5_before_asfreq, 
                              's5_after_asfreq': s5_after_asfreq,
                              's5_periodIndex': s5_periodIndex,
                              'ts5md': ts5md,
                              'stage5': stage5,
                              's5params': s5params,
                              's6params': s6params, 
                              'stage7': stage7, 
                             }
                            )
            )

## Radial bases

of the form $$\phi(x_i) = \text{exp} \Big[-\frac{1}{2\alpha} (x-m_i)^2 \Big]~ \forall ~\text{week, month, or} \dots$$
where $$
m_i=\begin{cases}
      1, & \text{if}\ x ~ \text{mod}~ im_1=0 \\
      0, & \text{otherwise}
    \end{cases}
$$

In [None]:
def rbf_builder(positions: np.ndarray,
                period: int = 7,
                alpha: float = 1.2,
                points_per_interval: int = 1,
               ) -> np.ndarray:
    """For the range of the sequence, produce a radial basis function (RBF) comprising
    smooth peaks around all existing multiples of a chosen period.
    
    The multiples are located where the modulo of the position (time) values and the 
    chosen period are zero.  The value of the multiple is equal to the input position 
    array at those locations.  For each of those locations and associated values,
    an RBF is formed by subtracting the value from the position array, as the 
    exponent will evaluate to `1` when its argument is zero.
    
    Each RBF (one per period multiple present in the data) is stored in a separate
    array, which are collected in a list and stacked into a 2D array.
    
    The final step, summing vertically over the 2D aray, provides a single RBF with 
    multiple humps--the form we need to include in a linear regression.  We do not need to
    return the results as a dataframe; a NumPy array may be preferable.
    
    Parameters
    ----------
    positions
        Position (time) values associated with a sequence of observations.
    period
        Number of sequence or time points (or rows) in the series that define a putative characteristic
        of the sequence. `7` implies weekly periodicity if data were recorded daily. The period by itself 
        has no inherent meaning; it is always in relation to the data structure (spacing of observations).
    alpha
        Width of the humps; higher is wider.
    points_per_interval
        Always one, except when unit testing with synthetic data using `numpy.linspace`.
    """
    rbf_segments = []
    # rbf_segments = {'x': sequence}
    
    for m in np.where(np.fmod(positions, period)==0)[0]:  # zero index pulls array out of tuple
        segment = np.exp(-(positions - positions[m])**2 / alpha)  # todo: trim sequence to around m
        rbf_segments.append(segment)  # ['rbf_' + str(int(m/points_per_interval))] = segment
    return np.sum(np.vstack(rbf_segments), axis=0)


def rbf_stitcher(seq: pd.DataFrame,
                 rbf: np.ndarray,
                 m_: int = 7,
                 characteristic: str = 'dow',
                 characteristic_value: int = 4,
                ) -> pd.DataFrame:
    """Extends regression design matrix X with a radial basis function (RBF),
    ensuring the RBF peak aligns to the desired positions within the sequence.
    
    Parameters
    ----------
    seq
        Sequence of observations.
    rbf
        Radial basis, a function of recurring humps.
    m_
        Period of the RBF.
    characteristic
        Name of the sequence or time characteristic.
    characteristic_value
        Ordinal value of the characteristic.  For example, if the characteristic is
        the day of the week (DoW) and we want the RBF to peak on Fridays,
        we have prepared a sequence that includes an ordinal encoding via `pandas.dt.weekday`
        where DoW = 4 corresponds to Fridays.
    """
    # Convert RBF into a named Series
    rbf_name = 'rbf_'+str(m_)+'_'+str(characteristic_value)
    rbf = pd.Series(rbf, name=rbf_name)
    
    # locate first instance where specified time characteristic appears in the sequence
    delay = seq[seq[characteristic]==characteristic_value].index[0]
    
    # Adjust index of RBF
    rbf.index=rbf.index + delay
    
    # Merge the RBF to the sequence
    seq = seq.merge(rbf, left_index=True, right_index=True, how='left')
    
    # Verify alignment -- possible?
    
    # Backfill any nulls in the RBF resulting from the index alignment
    idx_null_rbf = seq[np.isnan(seq[rbf_name])].index
    seq.loc[idx_null_rbf, rbf_name] = seq.loc[
        idx_null_rbf+max(idx_null_rbf)+2, rbf_name][::-1].to_numpy()
    # verify that the +2 scalar is universal and not dependent on the period m_
    
    return seq

## RBF mini test

Make a short, artificial sequence

In [None]:
x1, x2 = 1, 56
points_per_interval = 10
steps = (x2 - x1) * points_per_interval + 1
rbf = pd.DataFrame({'x': np.linspace(x1, x2, steps),
                    })
print(rbf[0:9])

Explore the effect of `alpha` on one hump of an RBF.

In [None]:
alpha = 1.2  # higher alpha, broader hump
m_ = 14
rbf = rbf.assign(y = np.exp(-1*(rbf.x - m_)**2 / alpha)) # np.fmod(, 7)
chrt = altair_ts_scatter(rbf, 'x', 'y', 'x')

In [None]:
chrt

Create the full RBF for the range of a given sequence

In [None]:
rbf_segments = rbf_builder(rbf.x.to_numpy())

In [None]:
rbf_segments

In [None]:
segment = 'rbf_7'
chrt2 = altair_ts_line(rbf_segments, 'x', segment, 'x')

In [None]:
chrt2

## Deploy RBF to longer artificial sequence
From above, we have a daily sequence where every Friday is boosted.  We have a function that creates an RBF that peaks every $m^\text{th}$ row of a DataFrame. We don't know if those positions align to the desired day of the week.  (And analogously, if we wanted an RBF that peaks around the end of each month, etc.)

In [None]:
rbf = rbf_builder(ts.reset_index()['index'].to_numpy(),
                  alpha=0.8)

In [None]:
tsrbf = rbf_stitcher(ts, rbf)

In [None]:
tsrbf[0:9]

In [None]:
segment = 'rbf_7_4'

In [None]:
chrt4 = altair_ts_scatter(tsrbf[0:60], 't', segment, 't')

In [None]:
# chrt4

## RBF models

- Continue to use `AutoReg`, adding RBF as an `exog` and disabling `seasonal` 
- Approximate the AR model's
    - Time trend, $X_1=t$ (ok to exclude)
    - lags (`regressors.create_lags`)
    - seasonality
        - binary dow variables
        - Friday RBF

In [None]:
auto_reg = AutoReg(tsrbf.y,
                   missing='raise',
                   lags=2,
                   trend='t',
                   seasonal=False,
                   # period=7,
                   exog=tsrbf.rbf_7_4,
                   old_names=False,
                  )
auto_reg1 = auto_reg.fit()
tsrbf.loc[:, 'y_hat'] = auto_reg1.predict()
pprint(auto_reg1.params)

In [None]:
pan_zoom = alt.selection_interval(bind='scales')
c0 = altair_ts_scatter(tsrbf, 't', 'y', 't')
c2 = altair_ts_line(tsrbf, 't', 'y_hat', 't')
(c0 + c2).add_selection(pan_zoom)

In [None]:
tsrbf[tsrbf.columns[~tsrbf.columns.isin(['y_hat', 't'])]]

In [None]:
tsrbf.columns[~tsrbf.columns.isin(['y_hat', 't'])]

In [None]:
lm = smf.old(formula'y ~ x + a + b', data=ts)