In [2]:
import pandas as pd

traffic = pd.read_csv(
    "../data/processed/traffic_aggregated.csv",
    parse_dates=["timestamp"],
    index_col="timestamp"
)

traffic.head()


Unnamed: 0_level_0,traffic_flow
timestamp,Unnamed: 1_level_1
2017-01-01 00:00:00,65.83143
2017-01-01 00:05:00,65.468198
2017-01-01 00:10:00,65.468198
2017-01-01 00:15:00,65.468198
2017-01-01 00:20:00,65.468198


Time-based features (calendar intelligence)

In [3]:
df = traffic.copy()

df["hour"] = df.index.hour
df["day_of_week"] = df.index.dayofweek
df["is_weekend"] = (df["day_of_week"] >= 5).astype(int)


Lag features (core of forecasting)

In [4]:
lags = [1, 3, 6, 12, 24]  # 5-min intervals

for lag in lags:
    df[f"lag_{lag}"] = df["traffic_flow"].shift(lag)


Rolling statistics (trend & volatility)

In [5]:
windows = [6, 12, 24]

for window in windows:
    df[f"roll_mean_{window}"] = df["traffic_flow"].rolling(window).mean()
    df[f"roll_std_{window}"] = df["traffic_flow"].rolling(window).std()


Drop NaNs created by features

In [6]:
df = df.dropna()
df.head()


Unnamed: 0_level_0,traffic_flow,hour,day_of_week,is_weekend,lag_1,lag_3,lag_6,lag_12,lag_24,roll_mean_6,roll_std_6,roll_mean_12,roll_std_12,roll_mean_24,roll_std_24
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-01-01 02:00:00,62.81379,2,6,1,67.012016,66.181734,65.35989,65.465191,65.83143,63.824131,2.237779,63.960167,1.672746,64.016883,1.985902
2017-01-01 02:05:00,67.49225,2,6,1,62.81379,62.058451,61.619649,63.235851,65.468198,64.802898,2.361598,64.314866,1.93579,64.101219,2.090441
2017-01-01 02:10:00,61.117262,2,6,1,67.49225,67.012016,63.259146,62.944482,65.468198,64.445917,2.76846,64.162598,2.116785,63.91993,2.154422
2017-01-01 02:15:00,65.2,2,6,1,61.117262,62.81379,66.181734,63.998413,65.468198,64.282295,2.672706,64.26273,2.13664,63.908755,2.146724
2017-01-01 02:20:00,40.9,2,6,1,65.2,67.49225,62.058451,65.381665,65.468198,60.755886,10.028855,62.222592,7.037798,62.88508,5.140694


save feature-engineered dataset

In [7]:
df.to_csv("../data/processed/traffic_features.csv")
