# 傅立葉變換

時間：2023

在時間預測的問題中，我們經常會需要對時間資料進行特徵工程，時間的資料中經常有季節性的訊息在其中，為了擷取季節性的特徵，我們可以使用傅立葉變換來進行特徵工程。

In [6]:
import pandas
import os
# import matplotlib.pyplot as plt
import matplotlib.pyplot
# import pandas as pd
import plotly.express
import pandas
# import seaborn as sns
import seaborn
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess

範例資料相當簡單，只有時間資訊跟對應的數值欄位，不過在處理時間資訊有很多種手法，可以經由週期或是指標等方式來轉換，在處理上要謹慎。

In [24]:
data = pandas.read_csv("./source/tunnel.csv")
data

Unnamed: 0,Day,NumVehicles
0,2003-11-01,103536
1,2003-11-02,92051
2,2003-11-03,100795
3,2003-11-04,102352
4,2003-11-05,106569
...,...,...
742,2005-11-12,104646
743,2005-11-13,94424
744,2005-11-14,112165
745,2005-11-15,114296


光一個時間欄位就可以變出有週期以及沒週期的時間訊息，謹慎在謹慎。

In [26]:
data['Day'] = pandas.to_datetime(data['Day'])
data['DayOfWeek'] = data['Day'].dt.dayofweek
data['DayOfYear'] = data['Day'].dt.dayofyear
data['Year'] = data['Day'].dt.year
data['WeekOrder'] = (data['DayOfWeek'] == 0).cumsum()
data

Unnamed: 0,Day,NumVehicles,DayOfWeek,DayOfYear,Year,NumberOfWeek,WeekOrder
0,2003-11-01,103536,5,305,2003,0,0
1,2003-11-02,92051,6,306,2003,0,0
2,2003-11-03,100795,0,307,2003,1,1
3,2003-11-04,102352,1,308,2003,1,1
4,2003-11-05,106569,2,309,2003,1,1
...,...,...,...,...,...,...,...
742,2005-11-12,104646,5,316,2005,106,106
743,2005-11-13,94424,6,317,2005,106,106
744,2005-11-14,112165,0,318,2005,107,107
745,2005-11-15,114296,1,319,2005,107,107


以七天為週期的每日資訊以及資料所用的時間總共包含幾個週期作圖，要想清楚因為不好描述。

In [27]:
figure = plotly.express.line(data, x='DayOfWeek', y='NumVehicles', color='WeekOrder')
figure.update(layout_showlegend=False)

以一年為週期的每日資料以及所有資料使用的時間年度作圖，要想清楚因為不好描述。

In [23]:
figure = plotly.express.line(data, x='DayOfYear', y='NumVehicles', color='Year')
figure

In [33]:
from scipy.signal import periodogram
# data['NumVehicles']
# frequency = 1

In [37]:
frequency, spectrum = periodogram(
    data['NumVehicles'],
    fs=365,
    detrend='linear',
    window="boxcar",
    scaling='spectrum',
)

In [38]:
figure = plotly.express.line(x=frequency, y=spectrum, line_shape='hv')
figure

In [31]:
fs = pandas.Timedelta("1Y") / pandas.Timedelta("1D")
fs


Units 'M', 'Y' and 'y' do not represent unambiguous timedelta values and will be removed in a future version



365.2425

In [None]:
# def plot_periodogram(ts, detrend='linear', ax=None):
    
#     fs = pd.Timedelta("1Y") / pd.Timedelta("1D")
#     freqencies, spectrum = periodogram(
#         ts,
#         fs=fs,
#         detrend=detrend,
#         window="boxcar",
#         scaling='spectrum',
#     )
#     if ax is None:
#         _, ax = plt.subplots()
#     ax.step(freqencies, spectrum, color="purple")
#     ax.set_xscale("log")
#     ax.set_xticks([1, 2, 4, 6, 12, 26, 52, 104])
#     ax.set_xticklabels(
#         [
#             "Annual (1)",
#             "Semiannual (2)",
#             "Quarterly (4)",
#             "Bimonthly (6)",
#             "Monthly (12)",
#             "Biweekly (26)",
#             "Weekly (52)",
#             "Semiweekly (104)",
#         ],
#         rotation=30,
#     )
#     ax.ticklabel_format(axis="y", style="sci", scilimits=(0, 0))
#     ax.set_ylabel("Variance")
#     ax.set_title("Periodogram")
#     return ax