In [2]:
import sys
sys.path.append('/notebooks/FAN')
import torch
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

%load_ext autoreload
%autoreload 2

# 1 select K based on spectrum

In [4]:
from torch_timeseries.data.scaler import MaxAbsScaler, StandarScaler
from torch_timeseries.datasets import *
import numpy as np
import torch 
dataset = Traffic(root='/notebooks/FAN/data/') # ExchangeRate ETTm1 Traffic Electricity Weather
scaler = StandarScaler()
window = 96#    7*24*2
dataloader = ChunkSequenceTimefeatureDataLoader(
    dataset,
    scaler,
    window=window,# 4380*2,
    horizon=1,
    steps=window,
    scale_in_train=False,
    shuffle_train=True,
    freq="h",
    batch_size=64,
    train_ratio=0.7,
    val_ratio=0.2,
    num_worker=20,
    uniform_eval=True
)

Using downloaded and verified file: /notebooks/FAN/data/traffic/traffic.txt.gz
Extracting /notebooks/FAN/data/traffic/traffic.txt.gz to /notebooks/FAN/data/traffic


In [5]:
train_batch_x = []
train_batch_y = []

for scaled_x, scaled_y,y, x_date_enc, y_date_enc in dataloader.train_loader:
    train_batch_x.append(scaled_x)
    
train_batch_x = torch.concat(train_batch_x, 0)


In [7]:
normed_train_fft = torch.fft.rfft(train_batch_x, dim=1)
fft_mag_mean = normed_train_fft.abs().mean(0)
fft_mag_std = normed_train_fft.abs().std(0)

In [8]:
K = sum(fft_mag_mean[:, 0] > 0.1*max(fft_mag_mean[:, 0]))
K

tensor(30)

# 2 data set metrics

## 2.1 Trend/seasonal variations

In [25]:
from statsmodels.tsa.stattools import acf
data = dataset.data[:, 0]
print("Traffic trend variation:",   abs(data[0:int(len(data)*0.7)].mean() -  data[-int(len(data)*0.3):].mean())/abs(data[0:int(len(data)*0.7)].mean()) )
print("Traffic seasonal variation:", sum( fft_mag_std[1:, 0] /fft_mag_mean[0, 0]).item())

Traffic trend variation: 0.06835729993310134
Traffic seasonal variation: 14.225421047159246


## 2.2  ADF test after normalization

In [9]:

data = dataloader.dataset.data
N = len(data) // 96
batch_x = data[:N*96, :].reshape(N, 96, -1)


### raw

In [11]:
from statsmodels.tsa.stattools import adfuller
from tqdm import tqdm
                                                                                                                                                                                                                     
raw_results = []
for i in tqdm(range(batch_x.shape[0])):
    try:
        result = adfuller(batch_x[i, :, 0])[0]
    except:
        continue
    raw_results.append(result)

raw_adf = np.mean(raw_results)
raw_adf

100%|██████████| 182/182 [00:01<00:00, 133.54it/s]


-4.620084081464585

### RevIN

In [13]:
mean = torch.tensor(batch_x).mean(1, keepdim=True)
std = torch.tensor(batch_x).std(1, keepdim=True)
revin_batch_x = (torch.tensor(batch_x) - mean)/(std + 1e-14)

revin_results = []
for i in tqdm(range(revin_batch_x.shape[0])):
    result = adfuller(revin_batch_x[i, :,0])[0]
    revin_results.append(result)
revin_adf = np.mean(revin_results)
revin_adf


100%|██████████| 182/182 [00:01<00:00, 180.25it/s]


-4.620084081464584

### FAN

In [50]:
def main_freq_part(x, k):
    # freq normalization
    # start = time.time()
    xf = torch.fft.rfft(x, dim=1)
    k_values = torch.topk(xf.abs(), k, dim = 1)  
    indices = k_values.indices


    mask = torch.zeros_like(xf)
    mask.scatter_(1, indices, 1)
    xf_filtered = xf * mask
    
    x_filtered = torch.fft.irfft(xf_filtered, dim=1).real.float()
    norm_input = x - x_filtered
    # print(f"decompose take:{ time.time() - start} s")
    return norm_input, x_filtered

In [51]:
fan_batch_x, _ = main_freq_part(torch.tensor(batch_x), 24)
fan_results = []
for i in tqdm(range(fan_batch_x.shape[0])):
    result = adfuller(fan_batch_x[i, :, 0])[0]
    fan_results.append(result)
fan_adf = np.mean(fan_results)
print(fan_adf)

100%|██████████| 182/182 [00:00<00:00, 190.35it/s]

-11.787563374669954



