In [8]:
import pandas as pd
import numpy as np
import os

In [10]:
import sys
from pathlib import Path

# Add the parent directory to the path to import from src
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../..')))

from src.features._ts_features import TimeSeriesFeatureEngineering

In [None]:
# Load the dataset
file_path = "../../data/bronze/LD2011_2014.txt"
df = pd.read_csv(file_path, sep=";", index_col=0, parse_dates=True, decimal=",")

In [12]:
# Convert column names to numeric (Client IDs)
df.columns = [f"Client_{i+1}" for i in range(df.shape[1])]

# Resample to hourly data
df_hourly = df.resample("h").sum()
df_hourly = df_hourly.fillna(0)

df_daily = df.resample("D").sum()

df_client = df_hourly["Client_1"].copy()

In [13]:
# Initialize the feature engineering class
ts_features = TimeSeriesFeatureEngineering(df_client, target_col="Client_1", country="US")

# Generate all features
df_with_features = ts_features.generate_all_features(lags=[1, 24], windows=[3, 7])


📊 TIME SERIES FEATURE ENGINEERING 📊
🔄 Input: pandas Series
   → Converting to DataFrame with column name: 'Client_1'

📋 DATA SUMMARY
--------------------------------------------------
• Rows: 35,065
• Columns: 1
• Target: 'Client_1'
• Country: US

🕒 PREPARING DATETIME INDEX

🕒 PREPARING DATETIME INDEX
✓ Index is already a DatetimeIndex
✓ DatetimeIndex ensured successfully

🔍 DETECTING TIME SERIES FREQUENCY
   → Pandas inferred frequency: h
• Frequency: hourly
--------------------------------------------------

🚀 GENERATING FEATURES
• Initial shape: 35,065 rows × 1 columns
• Target column: 'Client_1'
• Time series frequency: 'hourly'
--------------------------------------------------

⏰ Adding time features
   ✓ Added 6 time features
Adding cyclical time features...
Cyclical features added successfully for frequency: hourly
Adding calendar features...
Calendar features added successfully for frequency: hourly
Adding time-of-day features...
Time-of-day features added successfully.
Addin

In [14]:
# Display the resulting DataFrame
display(df_with_features.head(5))

Unnamed: 0,Client_1,year,hour,is_business_hour,month,day_of_week,is_weekend,hour_sin,hour_cos,day_of_week_sin,...,lag_1,lag_24,diff_1,pct_change_1,diff_24,pct_change_24,rolling_mean_3,rolling_std_3,rolling_mean_7,rolling_std_7
2011-01-01 00:00:00,0.0,2011,0,0,1,5,1,0.0,1.0,-0.974928,...,,,,,,,0.0,,0.0,
2011-01-01 01:00:00,0.0,2011,1,0,1,5,1,0.258819,0.965926,-0.974928,...,0.0,,0.0,,,,0.0,0.0,0.0,0.0
2011-01-01 02:00:00,0.0,2011,2,0,1,5,1,0.5,0.866025,-0.974928,...,0.0,,0.0,,,,0.0,0.0,0.0,0.0
2011-01-01 03:00:00,0.0,2011,3,0,1,5,1,0.707107,0.707107,-0.974928,...,0.0,,0.0,,,,0.0,0.0,0.0,0.0
2011-01-01 04:00:00,0.0,2011,4,0,1,5,1,0.866025,0.5,-0.974928,...,0.0,,0.0,,,,0.0,0.0,0.0,0.0


In [15]:
# Load the dataset
output_file_path = "../../data/silver/forecast_data.csv"
df_with_features.to_csv(output_file_path, index=True)