# 🔁 Forecast Features for Last Week of 2019

In this notebook, we assume we have **no data** for the last week of 2019.
We forecast all relevant features into this period using recursive models,
store the results, and visualize them.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from pathlib import Path

# Load the dataset
df = pd.read_csv('../data/processed/merged_energy_data_final_step_1.csv', parse_dates=['utc_timestamp'])
df.set_index('utc_timestamp', inplace=True)
df = df.sort_index()

## 🎯 Step 1: Forecast all input features from Dec 24 into Dec 25–31, 2019

In [None]:
# Features to forecast
features_to_forecast = [
    'DE_solar_generation_actual', 'DE_wind_generation_actual',
    'DE_load_actual_entsoe_transparency',
    'Gas_Price', 'Oil_Price',
    'DE_radiation_direct_horizontal', 'DE_radiation_diffuse_horizontal',
    'DE_temperature'
]

# Set forecast window
forecast_start = pd.Timestamp('2019-12-25 00:00:00')
forecast_end = pd.Timestamp('2019-12-31 23:00:00')
forecast_steps = int((forecast_end - forecast_start).total_seconds() // 3600 + 1)

predicted_features = {}

for feature in features_to_forecast:
    print(f'🔮 Forecasting: {feature}')
    
    data = df[[feature]].copy()
    data['hour'] = data.index.hour
    data['dayofweek'] = data.index.dayofweek
    data['month'] = data.index.month

    for lag in [1, 24, 168]:
        data[f'lag_{lag}'] = data[feature].shift(lag)
    data.dropna(inplace=True)

    X = data[[f'lag_{l}' for l in [1, 24, 168]] + ['hour', 'dayofweek', 'month']]
    y = data[feature]

    model = XGBRegressor(n_estimators=100)
    model.fit(X, y)

    # Start from 168h before the forecast start
    current = df.loc[forecast_start - pd.Timedelta(hours=168):forecast_start - pd.Timedelta(hours=1), feature].copy()
    predictions = []

    for i in range(forecast_steps):
        t = forecast_start + pd.Timedelta(hours=i)
        row = {
            'lag_1': current.iloc[-1],
            'lag_24': current.iloc[-24],
            'lag_168': current.iloc[-168],
            'hour': t.hour,
            'dayofweek': t.dayofweek,
            'month': t.month
        }
        x_input = pd.DataFrame([row])
        y_pred = model.predict(x_input)[0]
        predictions.append((t, y_pred))
        current.loc[t] = y_pred  # add prediction to current

    predicted_df = pd.DataFrame(predictions, columns=['utc_timestamp', feature]).set_index('utc_timestamp')
    predicted_features[feature] = predicted_df

    # Plot
    plt.figure(figsize=(12, 3))
    plt.plot(predicted_df.index, predicted_df[feature], label='Forecasted')
    plt.title(f'{feature} — Forecast (Dec 25–31, 2019)')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Save to CSV
combined = pd.concat(predicted_features.values(), axis=1)
combined.to_csv('../data/processed/forecasted_features_last_week_2019.csv')
print('✅ Saved forecasted features to processed folder.')