In [1]:
import sys
sys.path.append('../src')

from time_series import (
    load_data
)


In [None]:
df = load_data('../data/data_original_m6.csv')
df.head()

In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv("../data/data_original_m6.csv", parse_dates=['Date'], index_col='Date')
series = df['AAPL']  # Example stock


In [20]:
import pandas as pd
from typing import Union, Optional

def generate_lags(
    series: Union[pd.Series, pd.DataFrame],
    past_lags: int,
    future_lags: int,
    dropna: bool = False,
) -> pd.DataFrame:
    """
    Generate a lagged DataFrame from a univariate or multivariate time series.

    Parameters
    ----------
    series : pd.Series or pd.DataFrame
        Input time series (indexed by datetime).
    past_lags : int, default=1
        Number of past lags to include (y - 1, ..., y - p).
    future_lags : int, default=0
        Number of future lags to include (y + 1, ..., y + f).
    dropna : bool, default=False
        Whether to drop rows with NaNs created by shifting.

    Returns
    -------
    pd.DataFrame
        DataFrame with lagged values in the order:
        [y - p, ..., y - 1, y, y + 1, ..., y + f]
    """
    if isinstance(series, pd.Series):
        series = series.to_frame()

    # List to hold all shifted DataFrames
    dfs = []

    # Past lags (y - p to y - 1)
    for past in range(past_lags, 0, -1):
        lagged = series.shift(past)
        lagged.columns = [f"y - {past}" for _ in series.columns]
        dfs.append(lagged)

    # Current y
    current = series.copy()
    current.columns = ["y" for _ in series.columns]
    dfs.append(current)

    # Future lags (y + 1 to y + f)
    for future in range(1, future_lags + 1):
        lead = series.shift(-future)
        lead.columns = [f"y + {future}" for _ in series.columns]
        dfs.append(lead)

    # Concatenate all
    df = pd.concat(dfs, axis=1)

    return df.dropna() if dropna else df


In [24]:
import pandas as pd

s = pd.Series(range(10,110, 10), name="y")
df = generate_lags(s, past_lags=2, future_lags=2)

display(df)


Unnamed: 0,y - 2,y - 1,y,y + 1,y + 2
0,,,10,20.0,30.0
1,,10.0,20,30.0,40.0
2,10.0,20.0,30,40.0,50.0
3,20.0,30.0,40,50.0,60.0
4,30.0,40.0,50,60.0,70.0
5,40.0,50.0,60,70.0,80.0
6,50.0,60.0,70,80.0,90.0
7,60.0,70.0,80,90.0,100.0
8,70.0,80.0,90,100.0,
9,80.0,90.0,100,,
