In [1]:
import tsdb
import pickle
import logging
import numpy as np
import pandas as pd


In [2]:
force = False

In [3]:
def normalize(s: np.ndarray) -> list[float]:
    n = ((s - s.mean()) / np.std(s)).tolist()
    if (n.count('Nan') > 0 or np.std(s) < 0.00001):
        print('NaN or std: ', np.std(s))
    return n


def save(data: np.ndarray,
         to: str):
    assert isinstance(data, list)
    assert isinstance(data[0], list)
    assert isinstance(data[0][0], float)
    with open(to, "wb") as f:
        pickle.dump(data, f)
        

def split(data: list[np.ndarray],
          train: float = .9) -> tuple[list[np.ndarray],
                                      list[np.ndarray]]:
    """Generate a train/test split."""
    p = int(len(data) * train)
    return data[:p], data[p:]

## Electricity Transformer Temperature (ETT)

In [46]:
datasets_h = [
    'ETTh1',
    'ETTh2'
]
datasets_m = [
    'ETTm1',
    'ETTm2'
]

path = "../data/raw/ETT"

In [9]:
def load_ETT(s: str, path: str) -> pd.DataFrame:
    file_name = f"{path}/{s}.csv"  
    df = pd.read_csv(file_name)
    return df

In [47]:
"""
"""
ett = list()
for dataset_name in datasets_h:
    train = load_ETT(dataset_name, path)
    train.drop(columns=["date", "OT"], inplace=True)
    train = train.astype(float)
    ds = list()
    for col in train.columns:
        ds.append(normalize(np.array(train[col].tolist())))

    ett = ett + ds

dataset_train, dataset_test = split(ett)
save(dataset_train,
    f"../data/processed/ETTh_TRAIN.pickle")
save(dataset_test,
    f"../data/processed/ETTh_VAL.pickle")


ett = list()
for dataset_name in datasets_m:
    train = load_ETT(dataset_name, path)
    train.drop(columns=["date", "OT"], inplace=True)
    train = train.astype(float)
    ds = list()
    for col in train.columns:
        ds.append(normalize(np.array(train[col].tolist())))

    ett = ett + ds

dataset_train, dataset_test = split(ett)
save(dataset_train,
    f"../data/processed/ETTm_TRAIN.pickle")
save(dataset_test,
    f"../data/processed/ETTm_VAL.pickle")