In [None]:
from typing import Union

import numpy as np
import pandas as pd

In [None]:
def random_mask(n: int, percent_true: float = 0.5) -> np.ndarray:
    mask = np.full(n, False)
    mask[: int(n * percent_true)] = True
    np.random.shuffle(mask)
    return mask

In [None]:
n = 1000000
arr = np.arange(n).astype(float)
nan_mask = random_mask(n)
arr[nan_mask] = np.nan
arr[:10]

In [None]:
# class Imputer1D

In [None]:
def np_ffill(arr: np.ndarray) -> np.ndarray:
    mask = np.isnan(arr)
    idx = np.where(~mask, np.arange(mask.shape[0]), 0)
    idx = np.maximum.accumulate(idx, axis=0, out=idx)
    return arr[idx]

In [None]:
def np_bfill(arr: np.ndarray) -> np.ndarray:
    mask = np.isnan(arr)
    idx = np.where(~mask, np.arange(mask.shape[0]), mask.shape[0] - 1)
    idx = np.minimum.accumulate(idx[::-1], axis=0)[::-1]
    return arr[idx]

In [None]:
def ffill(data: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.Series]:
    if isinstance(data, pd.Series):
        return data.ffill()
    if isinstance(data, np.ndarray):
        n = data.shape[0]
        if n < 100000:
            return np_ffill(data)
        else:
            return pd.Series(data).ffill().values

In [None]:
def bfill(data: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.Series]:
    if isinstance(data, pd.Series):
        return data.bfill()
    if isinstance(data, np.ndarray):
        n = data.shape[0]
        if n <= 100000:
            return np_bfill(data)
        else:
            return pd.Series(data).bfill().values

In [None]:
# Add to normalization - e.g., get rid of ".values" and handling of pandas series!
# Can also be used for imputation

In [None]:
ffill(arr)

In [None]:
bfill(arr)

In [None]:
%timeit np_ffill(arr)

In [None]:
%timeit pd.Series(arr).ffill()

In [None]:
series = pd.Series(arr)

In [None]:
%timeit series.ffill()

In [None]:
%timeit pd.Series(np_ffill(series.values))