# Prototype: an efficient, approximate running mean filter

The window size (units of time) is denoted as $p$.

Constraints:
* The filter uses a prescribed number of bins, $n$, to store the history.
  Typically $n$ << the number of time steps the model would take to cover the window.
  For instance, a 1-day running mean may use only $n=24$ bins (1 per hour), even if the internal model time step $\Delta t$ is 600 s
  By definition, this means the running mean will be an approximation. But it should converge to a completely accurate
  result when there is one bin per model time step, i.e., when $p/n \rightarrow \Delta t$.
* We have no advance knowledge about the model time step $\Delta t$, which may be variable.

In [None]:
import numpy
from matplotlib import pyplot

p = 1.                    # window size in days
delta_t = 1. / 24. / 6.   # model time step in days
duration = 30.            # simulation duration in days
n = 24                    # number of bins for history (covering period p)
missing_value = -2.      # value to return while the simulation has not covered 1 window size yet

In [None]:
class Filter:
   def __init__(self):
      self.history = numpy.zeros((n + 1,))   # one for each completed bin, plus one more for the bin currently being filled
      self.previous_time = 0.
      self.previous_value = 0
      self.ibin = -1
      self.bin_end_time = 0.
      self.mean_value = missing_value
      self.last_mean = 0.
      self.complete = False

   def __call__(self, now: float, value: float) -> float:
      binwidth = p / n
      if self.ibin == -1:
         self.previous_time = now
         self.bin_end_time = now + binwidth
         self.ibin = 0

      while now >= self.bin_end_time:
         dt = self.bin_end_time - self.previous_time

         # Interpolate to value at right bin time
         w = dt / (now - self.previous_time)

         # Increment the bin we are completing (history[ibin]) and mean
         #bin_end_value = (1 - w) * self.previous_value + w * value
         #self.history[self.ibin] += 0.5 * dt * (self.previous_value + bin_end_value) / p
         self.history[self.ibin] += (self.previous_value + 0.5 * w * (value - self.previous_value)) * dt / p
         if self.complete:
            # We already had a complete history (bins covering the full window size). Add the newly full bin, subtract the oldest bin
            self.last_mean += self.history[self.ibin] - self.history[self.ibin + 1 if self.ibin != n else 0]
         elif self.ibin == n - 1:
            # We just completed our history. create the mean by summing all filled bins.
            self.last_mean = self.history[:n, ...].sum(axis=0)
            self.complete = True
         self.ibin = 0 if self.ibin == n else self.ibin + 1
         self.history[self.ibin] = 0.

         self.previous_time = self.bin_end_time
         self.previous_value += w * (value - self.previous_value)
         self.bin_end_time += binwidth

      # increment current bin (history[ibin])
      self.history[self.ibin] += 0.5 * (self.previous_value + value) / p * (now - self.previous_time)
      if self.complete:
         # we have a complete history - update the mean
         self.mean_value = self.last_mean + self.history[self.ibin] - self.history[self.ibin + 1 if self.ibin != n else 0] * (now - self.bin_end_time + binwidth) / binwidth

      self.previous_time = now
      self.previous_value = value
      return self.mean_value

In [None]:
# Calculate and plot variable for which to compute the running mean
times = numpy.arange(0, duration, delta_t)
values = numpy.sin(2 * numpy.pi * times)

fig, ax  = pyplot.subplots(figsize=(15, 4))
ax.plot(times, values)
ax.grid()

In [None]:
# Compute and plot the running mean
filter = Filter()
filtered = numpy.empty_like(values)
for i, (time, value) in enumerate(zip(times, values)):
   filtered[i] = filter(time, value)

fig, ax  = pyplot.subplots(figsize=(15, 4))
ax.plot(times, values)
ax.plot(times, filtered)
ax.grid()

In [None]:
# Compare running mean with analytical solution
# (this requires the window size to be a multiple of the model time step)
assert abs(p % delta_t) < 1e-15, 'Window size %s is not a multiple of the model time step %s. Residual: %s' % (p, delta_t, p % delta_t)
nstep = int(round(p / delta_t))
analytical = numpy.full_like(values, missing_value)
for i, (time, value) in enumerate(zip(times, values)):
   if i >= nstep:
      centers = 0.5 * (values[i - nstep:i] + values[i - nstep + 1:i + 1])
      analytical[i] = centers.mean()

fig, ax  = pyplot.subplots(figsize=(15, 4))
ax.plot(times, filtered - analytical)
ax.grid()


In [None]:
# Now try with randomly varying time step
filter = Filter()
time = 0.
rtimes, rvalues, rfiltered = [], [], []
while time < duration:
   value = numpy.sin(2 * numpy.pi * time)
   rtimes.append(time)
   rvalues.append(value)
   rfiltered.append(filter(time, value))
   dt = 2 * delta_t * numpy.random.rand()
   time += dt

fig, ax  = pyplot.subplots(figsize=(15, 4))
ax.plot(rtimes, rvalues)
ax.plot(rtimes, rfiltered)
ax.grid()

In [None]:
# Now try the running mean filter in FABM itself
import pyfabm
config = {'instances': {'mean': {'model': 'interior_temporal_mean', 'parameters': {'window': p, 'n': n, 'missing_value': missing_value}}}}
m = pyfabm.Model(config)
invar = m.findDependency('mean/source')
outvar = m.findDiagnosticVariable('mean/mean')
invar.value = missing_value
m.cell_thickness = 1
m.start()
fabm_fitered = numpy.empty_like(values)
for i, (time, value) in enumerate(zip(times, values)):
   invar.value = value
   m.getRates(time, surface=True)
   fabm_fitered[i] = outvar.value

fig, ax  = pyplot.subplots(figsize=(15, 4))
ax.plot(times, fabm_fitered - analytical)
ax.grid()
