In [33]:
%matplotlib inline
import copy
import json
from uuid import uuid4
from pathlib import Path
from typing import List, Dict

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorly as tl
from gluonts.dataset.util import to_pandas


from tens_utils import (
    get_gluonts_dataset,
    mad,
    rmse, 
    get_param_sweep,
    trend_cycle_decompose,
    naive_seasonal_decompose,
)


In [19]:
def is_seasonal(df, period: int, threshold: float = 0.75) -> bool:
    tc, res_tc = trend_cycle_decompose(df, period * 2)
    seas, res_seas = naive_seasonal_decompose(res_tc, period * 7)

    r2 = np.square(seas).sum() / np.square(res_tc).sum()
    
    return r2 > threshold

In [65]:
train_datas, test_datas, lens, freqs = get_gluonts_dataset(dataset_name="traffic")

INFO:root:downloading and processing traffic


saving time-series into /Users/caner/.mxnet/gluon-ts/datasets/traffic/train/data.json
saving time-series into /Users/caner/.mxnet/gluon-ts/datasets/traffic/test/data.json
14036    862
dtype: int64
H    862
dtype: int64


In [66]:
len(train_datas)

862

In [62]:
# !rm datasets/electricity_seasonal_75/*

In [72]:
# !mkdir datasets/traffic_85/

In [76]:
!ls -l datasets/traffic_85/ | wc -l

     134


In [73]:
EXPECTED_PERIOD = 24 * 7
OUT_PATH = Path("datasets/traffic_85/")

In [74]:
for data in train_datas:
    data_id = str(uuid4())[:7]
    
    data_out = {}

    if is_seasonal(to_pandas(data), period=EXPECTED_PERIOD, threshold=0.85):
#         print(data_id)
        
        data_out["id"] = data_id
        data_out["start"] = str(data["start"])
        data_out["target"] = data["target"].tolist()

        with open(OUT_PATH / f"{data_id}.json", "w") as fp:
            json.dump(data_out, fp)