<a href="https://colab.research.google.com/github/Zasegor/fourier-series-prediction/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import math
import numpy as np
import pandas as pd
from scipy.signal import find_peaks
from statsmodels.tsa.stattools import acf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import plotly.graph_objects as go


---

In [2]:
class FourierForecaster:
    """
    A class for forecasting time series using Fourier series with cosine terms.

    Args:
        l (int): Length of the period.
        m (int): Number of periods.
        p (int): Delay size for 'delay' method or AR order for 'arima' method.
        method (str): Forecasting method ('delay' for delay matrix or 'arima' for ARIMA).
        reg_lambda (float): Regularization parameter for solving linear systems.
    """
    def __init__(self, l: int, m: int, p: int = 1, method: str = "delay", reg_lambda: float = 1e-6):
        self.l = l
        self.m = m
        self.p = p
        self.method = method.lower()
        self.reg_lambda = reg_lambda
        self.matrix = None
        self.new_coefs = None
        self.prediction = None

        if self.method not in ["delay", "arima"]:
            raise ValueError("Method must be 'delay' or 'arima'")
        if self.l < 1 or self.m < 1 or self.p < 1:
            raise ValueError("l, m, and p must be positive integers")

    def _cos(self, k: int, t: int, l: int) -> float:
        """Computes the cosine term for Fourier series."""
        return math.cos(math.pi * k * t / l)

    def _get_matrix_and_vector(self, period_i: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        """Constructs the matrix and vector for Fourier coefficient computation."""
        if len(period_i) < 2:
            raise ValueError("period_i must have at least 2 elements")

        l = len(period_i) - 1
        t = np.arange(l + 1)
        k = np.arange(1, l + 1)

        cos_values = np.cos(np.pi * k[:, None] * t / l)
        matrix = np.hstack([0.5 * np.ones((l + 1, 1)), cos_values.T])
        y = period_i.copy()

        return matrix, y

    def _solve_system(self, M: np.ndarray, b: np.ndarray) -> np.ndarray:
        """Solves a linear system with optional regularization."""
        if np.linalg.cond(M) > 1e6:
            M_reg = M + self.reg_lambda * np.eye(M.shape[0])
            return np.linalg.solve(M_reg, b)
        return np.linalg.solve(M, b)

    def _get_matrix_from_series(self, series: pd.Series) -> np.ndarray:
        """Converts the time series into a matrix of period observations."""
        expected_length = self.m * self.l
        if len(series) < expected_length:
            raise ValueError(f"Series length {len(series)} is less than required {expected_length}")

        series_values = series.values[:expected_length]
        if len(series_values) < expected_length:
            series_values = np.pad(series_values, (0, expected_length - len(series_values)), mode='constant')

        return series_values.reshape(self.m, self.l)

    def _get_delay_matrix(self, input_vector: np.ndarray) -> np.ndarray:
        """Constructs a delay matrix for k-NN forecasting."""
        input_vector_copy = np.copy(input_vector)
        m = input_vector_copy.shape[0] % self.p

        if m != 0:
            input_vector_copy = np.delete(input_vector_copy, range(m))

        row_dim = input_vector_copy.shape[0] // self.p
        delay_matrix = np.resize(input_vector_copy, new_shape=(row_dim, self.p)).T

        return delay_matrix

    def _find_nearest(self, row: np.ndarray) -> set:
        """Finds indices of nearest neighbors for k-NN forecasting."""
        neighbors_cnt = 2 * self.p + 1
        last_element = row[-1]
        all_neighbors = row[:-1]
        idx = set(np.argsort(np.abs(all_neighbors - last_element))[:neighbors_cnt])

        return idx

    def _predict_by_one_step(self, input_vector: np.ndarray) -> float:
        """Predicts one step ahead using the specified method."""
        if self.method == "arima":
            if len(input_vector) < 3:
                return input_vector[-1]
            try:
                model = ARIMA(input_vector, order=(min(self.p, 2), 0, 0))
                model_fit = model.fit()
                return model_fit.forecast(steps=1)[0]
            except Exception:
                return input_vector[-1]
        else:  # delay method
            delay_matrix = self._get_delay_matrix(input_vector)
            last_row = delay_matrix[-1, :]
            nearest_neighbors_indexes = self._find_nearest(last_row)

            y = np.empty((0,))
            X = np.empty((0, self.p + 1))
            for index in nearest_neighbors_indexes:
                y = np.append(y, delay_matrix[0, index + 1])
                row = np.append(np.array([1]), delay_matrix[:, index])
                row = np.reshape(row, (1, self.p + 1))
                X = np.append(X, row, axis=0)

            coef = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)), X.T), y)
            return sum(np.append(np.array([1]), delay_matrix[:, -1]) * coef)

    def _get_new_fourier_coefs(self, periods: np.ndarray) -> list:
        """Computes Fourier coefficients for the next period."""
        new_coefs = []
        coefs_for_all_periods = []

        for period in periods:
            X, y = self._get_matrix_and_vector(period)
            fourier_coef_for_period = self._solve_system(X, y)
            coefs_for_all_periods.append(fourier_coef_for_period)

        coefs_for_all_periods = np.array(coefs_for_all_periods)

        for i in range(coefs_for_all_periods.shape[1]):
            coef_for_next_period = self._predict_by_one_step(coefs_for_all_periods[:, i])
            new_coefs.append(coef_for_next_period)

        return new_coefs

    def _predict_next_period(self) -> list:
        """Predicts the time series for the next period."""
        new_period = []
        for t in range(self.l):
            s = self.new_coefs[0] / 2
            for k in range(1, len(self.new_coefs)):
                s += self.new_coefs[k] * self._cos(k, t, self.l - 1)
            new_period.append(s)

        return new_period

    def fit(self, series: pd.Series) -> 'FourierForecaster':
        """
        Fits the model to the input time series.

        Args:
            series: Input time series (pandas Series).

        Returns:
            Self for method chaining.
        """
        self.matrix = self._get_matrix_from_series(series)
        self.new_coefs = self._get_new_fourier_coefs(self.matrix)
        return self

    def predict(self, index: pd.Index = None) -> pd.Series:
        """
        Predicts the next period.

        Args:
            index: Index for the predicted series (optional).

        Returns:
            Predicted series for the next period (pandas Series).
        """
        self.prediction = self._predict_next_period()
        if index is None:
            index = pd.RangeIndex(start=0, stop=self.l)
        return pd.Series(self.prediction, index=index)

    def evaluate(self, test_series: pd.Series, end_date: str = None) -> tuple[float, float]:
        """
        Evaluates the prediction against a test series.

        Args:
            test_series: Test time series (pandas Series).
            end_date: End date for evaluation (optional, e.g., '2020-02-21').

        Returns:
            Tuple of (MAE, MAPE) metrics.
        """
        if self.prediction is None:
            raise ValueError("Model must be fitted and predicted before evaluation")

        pred = pd.Series(self.prediction[:min(self.l, len(test_series))], index=test_series.index[:min(self.l, len(test_series))])
        if end_date:
            pred = pred[pred.index <= pd.Timestamp(end_date)]
            test = test_series[test_series.index <= pd.Timestamp(end_date)]
        else:
            test = test_series[:self.l]

        mae = round(mean_absolute_error(test, pred), 2)
        mape = round(mean_absolute_percentage_error(test, pred), 3)
        return mae, mape

    def plot(self, test_series: pd.Series, end_date: str = None) -> go.Figure:
        """
        Plots the predicted and test series using Plotly.

        Args:
            test_series: Test time series (pandas Series).
            end_date: End date for plotting (optional, e.g., '2020-02-21').

        Returns:
            Plotly Figure object.
        """
        if self.prediction is None:
            raise ValueError("Model must be fitted and predicted before plotting")

        pred = pd.Series(self.prediction[:min(self.l, len(test_series))], index=test_series.index[:min(self.l, len(test_series))])
        if end_date:
            pred = pred[pred.index <= pd.Timestamp(end_date)]
            test = test_series[test_series.index <= pd.Timestamp(end_date)]
        else:
            test = test_series[:self.l]

        fig = go.Figure()
        fig.add_scatter(
            x=pred.index,
            y=pred.values,
            line={'width': 2},
            name="Pred"
        )
        fig.add_scatter(
            x=test.index,
            y=test.values,
            line={'width': 2},
            name="Test"
        )
        fig.update_layout(
            title="Fourier Forecast vs Test Data",
            xaxis_title="Date",
            yaxis_title="Value",
            template="plotly_white"
        )
        return fig

# Синтетические данные

In [None]:
start_date = '2013-01-01'
dates = pd.date_range(start=start_date, periods=360*10, freq='D')
n_days = len(dates)

np.random.seed(42)
t = np.arange(n_days)

cycle1 = 50 * np.cos(2 * np.pi * t / 180)
cycle2 = 30 * np.cos(2 * np.pi * t / 360)

trend = 0.1 * t + 100

noise = np.random.normal(0, 10, n_days)

prices = trend + cycle1 + cycle2 + noise
prices = np.maximum(prices, 10)

series = pd.Series(prices, index=dates, name='price')

df = pd.DataFrame({'datetime': series.index, 'price': series.values})
df.to_csv('synthetic_prices.csv', index=False)

print(series.head())

2013-01-01    184.967142
2013-01-02    178.682329
2013-01-03    186.536813
2013-01-04    195.215279
2013-01-05    177.498791
Freq: D, Name: price, dtype: float64


In [None]:
series = pd.Series(df['price'].values, index=pd.to_datetime(df['datetime']))

train_size = 360*9
train_series = series[:train_size]
test_series = series[train_size:]

In [None]:
fig = go.Figure()

fig.add_scatter(
    x=train_series.index,
    y=train_series.values,
    line={'width':2},
    name="Train"
)

fig.add_scatter(
    x=test_series.index,
    y=test_series.values,
    line={'width':2},
    name="Test"
)

fig.update_layout(
    title='Предсказание погоды',
    scene=dict(
        xaxis_title='Дата',
        yaxis_title='oC'
    ),
    showlegend=True,
    height=600
)

In [None]:
forecaster = FourierForecaster(l=360, m=9, p=2, method="arima")
forecaster.fit(train_series)

test_pred = forecaster.predict(index=test_series.index[:360])

mae, mape = forecaster.evaluate(test_series)
print(f"MAE: {mae}, MAPE: {mape}")

fig1 = forecaster.plot(test_series)
fig1.show()


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals



MAE: 10.62, MAPE: 0.024


In [None]:
fig.add_scatter(
    x=test_pred.index,
    y=test_pred.values,
    line={'width':2},
    name="Pred"
)

# Погода

In [None]:
data = pd.read_csv('/content/DailyDelhiClimateTrain.csv')[['date', 'meantemp']]
data['date'] = pd.to_datetime(data['date'])
test_data = pd.read_csv('/content/DailyDelhiClimateTest.csv')[['date', 'meantemp']]
test_data['date'] = pd.to_datetime(test_data['date'])

temp_series = data.set_index('date')['meantemp']
temp_series = temp_series[~((temp_series.index.month == 2) & (temp_series.index.day == 29))][:-1]
test_series = test_data.set_index('date')['meantemp']
test_series = test_series[~((test_series.index.month == 2) & (test_series.index.day == 29))]

In [None]:
fig = go.Figure()

fig.add_scatter(
    x=temp_series.index,
    y=temp_series.values,
    line={'width':2},
    name="Train"
)

fig.add_scatter(
    x=test_series.index,
    y=test_series.values,
    line={'width':2},
    name="Test"
)

fig.update_layout(
    title='Предсказание погоды',
    scene=dict(
        xaxis_title='Дата',
        yaxis_title='oC'
    ),
    showlegend=True,
    height=600
)

In [None]:
m = 4
l = 365
p = 1

index = pd.date_range("2017-01-01", "2017-12-31", freq="D")

forecaster = FourierForecaster(l=l, m=m, p=p, method="arima")
forecaster.fit(temp_series)

test_pred = forecaster.predict(index=index)

mae, mape = forecaster.evaluate(test_series)
print(f"MAE: {mae}, MAPE: {mape}")

fig1 = forecaster.plot(test_series)
fig1.show()


Maximum Likelihood optimization failed to converge. Check mle_retvals


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.


Maximum Likelihood optimization failed to converge. Check mle_retvals


Maximum Likelihood optimization failed to converge. Check mle_retvals


Maximum Likelihood optimization failed to converge. Check mle_retvals


Maximum Likelihood optimization failed to converge. Check mle_retvals


Maximum Likelihood optimization failed to converge. Check mle_retvals


Maximum Likelihood optimization failed to converge. Check mle_retvals



MAE: 2.56, MAPE: 0.118



Maximum Likelihood optimization failed to converge. Check mle_retvals



In [None]:
fig.add_scatter(x=test_pred.index,
                y=test_pred.values,
                line={'width':2},
                name="Pred"
)

---

# Самолеты

In [None]:
data = pd.read_csv('/content/euro_arrivals.csv')
data['FLT_DATE'] = pd.to_datetime(data['FLT_DATE'])

data_series = data.set_index('FLT_DATE')['FLT_ARR_1']
data_series = data_series[~((data_series.index.month == 2) & (data_series.index.day == 29))]

In [None]:
[len(data_series[data_series.index.year == i]) for i in range(2016, 2025)]

[366, 365, 365, 365, 366, 365, 365, 365, 366]

In [None]:
fig = go.Figure()

fig.add_scatter(
    x=data_series.index,
    y=data_series.values,
    line={'width':2},
    name="Data"
)

fig.update_layout(
    title='',
    scene=dict(
        xaxis_title='',
        yaxis_title=''
    ),
    showlegend=True,
    height=600
)

In [None]:
train_series = data_series[data_series.index.year<=2019]
test_series = data_series[data_series.index.year==2020]
test_series = test_series[test_series.index<=pd.Timestamp('2020-02-21')]

In [None]:
test_series.head()

Unnamed: 0_level_0,FLT_ARR_1
FLT_DATE,Unnamed: 1_level_1
2020-01-01,16056
2020-01-02,20778
2020-01-03,21282
2020-01-04,18666
2020-01-05,20757


In [None]:
fig = go.Figure()

fig.add_scatter(
    x=train_series.index,
    y=train_series.values,
    line={'width':2},
    name="Train"
)

fig.update_layout(
    title='',
    scene=dict(
        xaxis_title='',
        yaxis_title=''
    ),
    showlegend=True,
    height=600
)

In [None]:
m = 4
l = 365
p = 1

index = pd.date_range(start='2020-01-01', end='2020-12-31', freq='D')
index = index[~((index.month == 2) & (index.day == 29))]

forecaster = FourierForecaster(l=l, m=m, p=p)
forecaster.fit(train_series)

test_pred = forecaster.predict(index=index)

mae, mape = forecaster.evaluate(test_series, end_date='2020-02-21')
print(f"MAE: {mae}, MAPE: {mape}")

fig1 = forecaster.plot(test_series, end_date='2020-02-21')
fig1.show()

MAE: 1711.85, MAPE: 0.092


In [None]:
forecaster_arima = FourierForecaster(l=l, m=m, p=p, method="arima")
forecaster_arima.fit(train_series)

test_pred_arima = forecaster_arima.predict(index=index)

mae, mape = forecaster_arima.evaluate(test_series, end_date='2020-02-21')
print(f"MAE: {mae}, MAPE: {mape}")

fig1 = forecaster_arima.plot(test_series, end_date='2020-02-21')
fig1.show()


Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.



MAE: 1720.51, MAPE: 0.089


In [None]:
fig.add_scatter(
    x=test_pred.index,
    y=test_pred.values,
    line={'width':2},
    name="Pred"
)

fig.add_scatter(
    x=test_pred_arima.index,
    y=test_pred_arima.values,
    line={'width':2},
    name="PredArima"
)

fig.add_scatter(
    x=test_series.index,
    y=test_series.values,
    line={'width':2},
    name="Test"
)

---

# SNP

In [21]:
data  = pd.read_csv("/content/SNP/Download Data - INDEX_US_S&P US_SPX (0).csv", parse_dates=['Date'], index_col='Date')
for i in range(1, 10):
  tempdata = pd.read_csv(f"/content/SNP/Download Data - INDEX_US_S&P US_SPX ({i}).csv", parse_dates=['Date'], index_col='Date')
  data = data.combine_first(tempdata)

data.head()

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-01-04,2038.2,2038.2,1989.68,2012.66
2016-01-05,2013.78,2021.94,2004.17,2016.71
2016-01-06,2011.71,2011.71,1979.05,1990.26
2016-01-07,1985.32,1985.32,1938.83,1943.09
2016-01-08,1945.97,1960.4,1918.46,1922.03


In [30]:
data = pd.read_csv("/content/snp500_history.csv", parse_dates=['Date'], index_col='Date')
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1927-12-30 00:00:00-05:00,17.66,17.66,17.66,17.66,0,0.0,0.0
1928-01-03 00:00:00-05:00,17.76,17.76,17.76,17.76,0,0.0,0.0
1928-01-04 00:00:00-05:00,17.719999,17.719999,17.719999,17.719999,0,0.0,0.0
1928-01-05 00:00:00-05:00,17.549999,17.549999,17.549999,17.549999,0,0.0,0.0
1928-01-06 00:00:00-05:00,17.66,17.66,17.66,17.66,0,0.0,0.0


In [38]:
data = data[data.index>pd.Timestamp('2010-01-01 00:00:00-05:00')]

In [54]:
data = pd.read_csv("/content/snpdataset.csv", parse_dates=['timestamp'], index_col='timestamp')
data.head()

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-09-11 09:30:00,44.01,44.05,44.01,44.01,
2017-09-11 09:31:00,44.01,44.25,44.0,44.25,139460.0
2017-09-11 09:32:00,44.25,44.32,44.22,44.27,53119.0
2017-09-11 09:33:00,44.28,44.46,44.2504,44.34,38228.0
2017-09-11 09:34:00,44.34,44.36,44.22,44.24,32831.0


In [57]:
data.index[-1]

Timestamp('2017-11-07 11:12:00')

In [55]:
data.describe()

Unnamed: 0,open,high,low,close,volume
count,16119.0,16119.0,16119.0,16119.0,15666.0
mean,48.876498,48.897112,48.855891,48.876059,3469.346036
std,2.532328,2.532869,2.531208,2.531823,5713.668844
min,44.01,44.05,44.0,44.01,100.0
25%,47.115,47.13,47.1,47.115,1000.0
50%,47.71,47.73,47.69,47.7099,2100.0
75%,51.34,51.37,51.3,51.3347,3900.0
max,53.64,53.74,53.47,53.63,297047.0


In [53]:
fig = go.Figure()

fig.add_scatter(
    x=data.index,
    y=data['close'],
    line={'width':2},
    name="Data"
)

In [None]:
fig = go.Figure()

fig.add_scatter(
    x=train_series.index,
    y=train_series.values,
    line={'width':2},
    name="Train"
)

fig.add_scatter(
    x=test_series.index,
    y=test_series.values,
    line={'width':2},
    name="Test"
)

fig.update_layout(
    title='Предсказание погоды',
    scene=dict(
        xaxis_title='Дата',
        yaxis_title='oC'
    ),
    showlegend=True,
    height=600
)