# core

> Fill in a module description here

In [None]:
#|default_exp tsfeatures

In [None]:
#|export
import warnings

warnings.warn = lambda *a, **kw: False
import os

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"

In [None]:
#|export

from typing import Callable, Dict, List, Optional
from collections import ChainMap
from multiprocessing import Pool
from functools import partial

import numpy as np
import pandas as pd

In [None]:
#|export
from tsfeatures.features import *
from tsfeatures.utils import *

In [None]:
#|export
def _get_feats(
    index,
    ts,
    freq,
    scale: bool = True,
    features: List[Callable]= None,
    dict_freqs=FREQS,
):
    
    if features is None:
        features = [        acf_features,
        arch_stat,
        crossing_points,
        entropy,
        flat_spots,
        heterogeneity,
        holt_parameters,
        lumpiness,
        nonlinearity,
        pacf_features,
        stl_features,
        stability,
        hw_parameters,
        unitroot_kpss,
        unitroot_pp,
        series_length,
        hurst,    ]
    
    if freq is None:
        inf_freq = pd.infer_freq(ts["ds"])
        if inf_freq is None:
            raise Exception(
                "Failed to infer frequency from the `ds` column, "
                "please provide the frequency using the `freq` argument."
            )

        freq = dict_freqs.get(inf_freq)
        if freq is None:
            raise Exception(
                "Error trying to convert infered frequency from the `ds` column "
                "to integer. Please provide a dictionary with that frequency "
                "as key and the integer frequency as value. "
                f"Infered frequency: {inf_freq}"
            )

    if isinstance(ts, pd.DataFrame):
        assert "y" in ts.columns
        ts = ts["y"].values

    if isinstance(ts, pd.Series):
        ts = ts.values

    if scale:
        ts = scalets(ts)

    c_map = ChainMap(
        *[dict_feat for dict_feat in [func(ts, freq) for func in features]]
    )

    return pd.DataFrame(dict(c_map), index=[index])

In [None]:
# |export
def tsfeatures(
    ts: pd.DataFrame,
    freq: Optional[int] = None,
    features: List[Callable] = None,
    dict_freqs: Dict[str, int] = FREQS,
    scale: bool = True,
    threads: Optional[int] = None,
) -> pd.DataFrame:
    """Calculates features for time series.

    Parameters
    ----------
    ts: pandas df
        Pandas DataFrame with columns ['unique_id', 'ds', 'y'].
        Long panel of time series.
    freq: int
        Frequency of the time series. If None the frequency of
        each time series is infered and assigns the seasonal periods according to
        dict_freqs.
    features: iterable
        Iterable of features functions.
    scale: bool
        Whether (mean-std)scale data.
    dict_freqs: dict
        Dictionary that maps string frequency of int. Ex: {'D': 7, 'W': 1}
    threads: int
        Number of threads to use. Use None (default) for parallel processing.

    Returns
    -------
    pandas df
        Pandas DataFrame where each column is a feature and each row
        a time series.
    """

    if features is None:
        features = [
            acf_features,
            arch_stat,
            crossing_points,
            entropy,
            flat_spots,
            heterogeneity,
            holt_parameters,
            lumpiness,
            nonlinearity,
            pacf_features,
            stl_features,
            stability,
            hw_parameters,
            unitroot_kpss,
            unitroot_pp,
            series_length,
            hurst,
        ]

    partial_get_feats = partial(
        _get_feats, freq=freq, scale=scale, features=features, dict_freqs=dict_freqs
    )

    with Pool(threads) as pool:
        ts_features = pool.starmap(partial_get_feats, ts.groupby("unique_id"))

    ts_features = pd.concat(ts_features).rename_axis("unique_id")
    ts_features = ts_features.reset_index()

    return ts_features

In [None]:
from tsfeatures.m4_data import prepare_m4_data


def calculate_features_m4(dataset_name, directory, num_obs=1000000):
    _, y_train_df, _, _ = prepare_m4_data(
        dataset_name=dataset_name, directory=directory, num_obs=num_obs
    )


#     freq = FREQS[dataset_name[0]]
#     py_feats = tsfeatures(y_train_df, freq=freq).set_index("unique_id")

freq = FREQS["Hourly"[0]]
calculate_features_m4("Hourly", "data", 100)
# calculate_features_m4("Daily", "data", 100)





In [None]:
_, y_train_df, _, _ = prepare_m4_data(
    dataset_name="Hourly", directory="data", num_obs=100
)





In [None]:
tsfeatures(y_train_df, freq="Hourly", threads=1).set_index("unique_id")

Process SpawnPoolWorker-93:
Traceback (most recent call last):
  File "/Users/JdeTheije/miniconda3/envs/tsf/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/JdeTheije/miniconda3/envs/tsf/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/JdeTheije/miniconda3/envs/tsf/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/JdeTheije/miniconda3/envs/tsf/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute '_get_feats' on <module '__main__' (built-in)>
Process SpawnPoolWorker-94:
Traceback (most recent call last):
  File "/Users/JdeTheije/miniconda3/envs/tsf/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/JdeTheije/miniconda3/envs/tsf/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(

In [None]:
#|hide
from nbdev.showdoc import *

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()