In [1]:
# Import all the libraries

import os
import aeon
from aeon.datasets import load_from_tsf_file

In [7]:
DATA_PATH = os.path.join(os.path.dirname(aeon.__file__), "datasets\\data\\")
data, metadata = load_from_tsf_file(DATA_PATH+"m1_yearly_dataset\\m1_yearly_dataset.tsf")

In [11]:
type(data)

pandas.core.frame.DataFrame

In [12]:
type(metadata)

dict

In [9]:
data.head()

Unnamed: 0,series_name,start_timestamp,series_value
0,T1,1972-01-01,"[3600.0, 7700.0, 12300.0, 30500.0, 47390.0, 57..."
1,T2,1974-01-01,"[12654.0, 22879.0, 34164.0, 49524.0, 64761.0, ..."
2,T3,1974-01-01,"[2142.0, 12935.0, 19130.0, 30500.0, 48177.0, 5..."
3,T4,1974-01-01,"[5774.0, 7650.0, 9271.0, 21447.0, 28998.0, 409..."
4,T5,1976-01-01,"[432312.0, 569011.0, 862673.0, 1155640.0, 1439..."


In [10]:
metadata

{'frequency': 'yearly',
 'forecast_horizon': 6,
 'contain_missing_values': False,
 'contain_equal_length': False}

In [13]:
import textwrap
import numpy as np
import pandas as pd

In [18]:
def _write_header_tsf(
    path,
    dataset_name,
    time_stamp="date",
    equal_length=True,
    frequency="weekly",
    horizon=8,
    missing=False,
    comment=None,
    suffix=None,
    extension=".tsf",
):
    if not os.path.exists(path):
        os.makedirs(path)
        
    load_path = f"{path}/{dataset_name}"
    if suffix is not None:
        load_path = load_path + suffix
    load_path = load_path + extension

    file = open(load_path, "w")

    if comment is not None:
        file.write("\n# ".join(textwrap.wrap("# " + comment)))
        file.write("\n")
        
    file.write(f"@relation {str(dataset_name).lower()}\n")
    file.write("@attribute series_name string\n")
    file.write(f"@attribute start_timestamp {str(time_stamp).lower()}\n")
    file.write(f"@frequency {str(frequency).lower()}\n")
    file.write(f"@horizon {str(horizon).lower()}\n")
    file.write(f"@missing {str(missing).lower()}\n")
    file.write(f"@equallength {str(equal_length).lower()}\n")
    file.write("@data\n")

    return file


In [None]:
import os
import textwrap

def _write_header_tsf(
    path,
    dataset_name,
    univariate=True,
    equal_length=True,
    frequency="weekly",
    horizon=8,
    missing=False,
    comment=None,
    suffix=None,
    extension=".tsf",
):
    if not os.path.exists(path):
        os.makedirs(path)

    load_path = f"{path}/{dataset_name}"
    if suffix is not None:
        load_path = load_path + suffix
    load_path = load_path + extension

    file = open(load_path, "w")

    if comment is not None:
        file.write("\n# ".join(textwrap.wrap("# " + comment)))
        file.write("\n")

    file.write("# Dataset Information\n")
    file.write(f"@relation {dataset_name}\n")
    file.write("@attribute series_name string\n")
    file.write("@attribute start_timestamp date\n")
    file.write(f"@frequency {frequency}\n")
    file.write(f"@horizon {horizon}\n")
    file.write(f"@missing {str(missing).lower()}\n")
    file.write(f"@equallength {str(equal_length).lower()}\n")
    file.write("@data\n")

    return file


In [None]:
def _write_dataframe_to_tsf_file(
    X, path, problem_name="sample_data", y=None, comment=None, regression=False
):
    # ensure data provided is a dataframe
    if not isinstance(X, pd.DataFrame):
        raise ValueError(f"Data provided must be a DataFrame, passed a {type(X)}")
    # See if passed file name contains .tsf extension or not
    split = problem_name.split(".")
    if split[-1] != "tsf":
        problem_name = problem_name + ".tsf"
    class_labels = None
    if y is not None:
        class_labels = np.unique(y)
    univariate = X.shape[1] == 1
    # dataframes are always equal length
    equal_length = True
    series_length = X.shape[0]
    file = _write_header_tsf(
        path,
        problem_name,
        equal_length=equal_length,
        series_length=series_length,
        class_labels=class_labels,
        comment=comment,
        regression=regression,
        extension=None,
    )
    n_cases, n_channels = X.shape
    for i in range(0, n_cases):
        for j in range(0, n_channels):
            series = X.iloc[i, j]
            for k in range(0, series.size - 1):
                file.write(f"{series[k]},")
            file.write(f"{series[series.size-1]}:")
        file.write(f"{y[i]}\n")
    file.close()


In [None]:
def _write_data_to_tsfile(
    X,
    path,
    problem_name,
    y=None,
    missing_values="NaN",
    comment=None,
    suffix=None,
    regression=False,
):
    """Output a dataset to .ts texfile format.

    Automatically adds the .ts suffix if not the suffix to problem_name.

    Parameters
    ----------
    X: Union[list, np.ndarray]
        time series collection, either a 3d ndarray  (n_cases, n_channels,
        n_timepoints) or a list of [n_cases] 2d numpy arrays (possibly variable
        length)
    path: str
        The full path to output the ts file to.
    problem_name: str
        The problemName to print in the header of the ts file and also the name of
        the file.
    y: list, ndarray or None, default=None
        The class values for each case, optional.
    missing_values: str, default="NaN"
        Representation for missing values.
    comment: str or None, default=None
        Comment text to be inserted before the header in a block.
    suffix: str or None, default=None
        Addon at the end of the filename before the file extension, i.e. _TRAIN or
        _TEST

    Returns
    -------
    None

    Notes
    -----
    This version currently does not support writing timestamp data.
    """
    # ensure data provided is a ndarray
    if not isinstance(X, np.ndarray) and not isinstance(X, list):
        raise TypeError("Data provided must be a ndarray or a list")
    class_labels = None
    if y is not None:
        # ensure number of cases is same as the class value list
        if len(X) != len(y):
            raise IndexError(
                "The number of cases in X does not match the number of values in y"
            )
    if not regression:
        class_labels = np.unique(y)
    n_cases = len(X)
    n_channels = len(X[0])
    univariate = n_channels == 1
    equal_length = True
    if isinstance(X, list):
        length = len(X[0][0])
        for i in range(1, n_cases):
            if length != len(X[i][0]):
                equal_length = False
                break
    series_length = -1
    if equal_length:
        series_length = len(X[0][0])
    file = _write_header(
        path,
        problem_name,
        univariate=univariate,
        equal_length=equal_length,
        series_length=series_length,
        class_labels=class_labels,
        comment=comment,
        regression=regression,
        suffix=suffix,
        extension=None,
    )
    for i in range(n_cases):
        for j in range(n_channels):
            series = ",".join(
                [str(num) if not np.isnan(num) else missing_values for num in X[i][j]]
            )
            file.write(str(series))
            file.write(":")
        if y is not None:
            file.write(str(y[i]))
        file.write("\n")
    file.close()


In [None]:
def write_to_tsfile(
    X, path, y=None, problem_name="sample_data.ts", header=None, regression=False
):
    """Write an aeon collection of time series to text file in .ts format.

    Write metadata and data stored in aeon compatible data set to file.
    A description of the ts format is in examples/load_data.ipynb.

    Note that this file is structured to still support the

    Parameters
    ----------
    X : np.ndarray (n_cases, n_channels, series_length) or list of np.ndarray[
    n_cases] or pd.DataFrame with (n_cases,n_channels), each cell a pd.Series
        Collection of time series: univariate, multivariate, equal or unequal length.
    path : string.
        Location of the directory to write file
    y: None or ndarray, default = None
        Response variable, discrete for classification, continuous for regression
        None if clustering.
    problem_name : string, default = "sample_data"
        The file is written to <path>/<problem_name>/<problem_name>.ts
    header: string, default = None
        Optional text at the top of the file that is ignored when loading.
    regression: boolean, default = False
        Indicate if this is a regression problem, so it is correcty specified in
        the header since there is no definite way of inferring this from y
    """
    if not (
        isinstance(X, np.ndarray) or isinstance(X, list) or isinstance(X, pd.DataFrame)
    ):
        raise TypeError(
            f" Wrong input data type {type(X)} convert to np.ndarray ("
            f"n_cases, n_channels,n_timepoints) if equal length or list "
            f"of [n_cases] np.ndarray shape (n_channels, n_timepoints) if unequal"
        )

    # See if passed file name contains .ts extension or not
    split = problem_name.split(".")
    if split[-1] != "ts":
        problem_name = problem_name + ".ts"

    if isinstance(X, np.ndarray) or isinstance(X, list):
        _write_data_to_tsfile(X, path, problem_name, y=y, regression=regression)
    else:
        _write_dataframe_to_tsfile(
            X,
            path,
            problem_name=problem_name,
            y=y,
            comment=header,
            regression=regression,
        )


In [None]:
def _write_header(
    path,
    problem_name,
    univariate=True,
    equal_length=False,
    series_length=-1,
    comment=None,
    regression=False,
    class_labels=None,
    suffix=None,
    extension=None,
):
    if class_labels is not None and regression:
        raise ValueError("Cannot have class_labels true for a regression problem")
    # create path if it does not exist
    dir = f"{str(path)}/"
    try:
        os.makedirs(dir, exist_ok=True)
    except OSError:
        raise ValueError(f"Error trying to access {dir} in _write_header")
    # create ts file in the path
    load_path = f"{dir}{str(problem_name)}"
    if suffix is not None:
        load_path = load_path + suffix
    if extension is not None:
        load_path = load_path + extension
    file = open(load_path, "w")
    # write comment if any as a block at start of file
    if comment is not None:
        file.write("\n# ".join(textwrap.wrap("# " + comment)))
        file.write("\n")

    """ Writes the header info for a ts file"""
    file.write(f"@problemName {problem_name}\n")
    file.write("@timestamps false\n")
    file.write(f"@univariate {str(univariate).lower()}\n")
    file.write(f"@equalLength {str(equal_length).lower()}\n")
    if series_length > 0 and equal_length:
        file.write(f"@seriesLength {series_length}\n")
    # write class labels line
    if class_labels is not None:
        space_separated_class_label = " ".join(str(label) for label in class_labels)
        file.write(f"@classLabel true {space_separated_class_label}\n")
    else:
        file.write("@classLabel false\n")
        if regression:  # or if a regresssion problem, write target label
            file.write("@targetlabel true\n")
    file.write("@data\n")
    return file