In [None]:
!pip install dataset-hub -q

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression

from dataset_hub.timeseries import get_household_power

# Load dataset
df = get_household_power()
df.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,datetime
0,4.216,0.418,234.84,18.4,0.0,1.0,17.0,2006-12-16 17:24:00
1,5.36,0.436,233.63,23.0,0.0,1.0,16.0,2006-12-16 17:25:00
2,5.374,0.498,233.29,23.0,0.0,2.0,17.0,2006-12-16 17:26:00
3,5.388,0.502,233.74,23.0,0.0,1.0,17.0,2006-12-16 17:27:00
4,3.666,0.528,235.68,15.8,0.0,1.0,17.0,2006-12-16 17:28:00


## Monthly data preparation

In [5]:
# Combine Date and Time into datetime
df["datetime"] = pd.to_datetime(df["Date"] + " " + df["Time"], dayfirst=True)

# Drop original 'Date' and 'Time' columns as they are no longer needed for aggregation
df = df.drop(columns=["Date", "Time"])

# Aggregate to monthly mean
df_monthly = df.resample("ME", on="datetime").mean()

# Drop last November 2010
df_monthly = df_monthly.iloc[:-1]
print("Dropped last month (November 2010) because data is missing for days 27–30.")

Dropped last month (November 2010) because data is missing for days 27–30.


## X & y preparation

In [6]:
# Target series
y = df_monthly["Global_active_power"]

# Lag features (12 months)
X = pd.concat([y.shift(i) for i in range(1, 13)], axis=1)
X.columns = [f"lag_{i}" for i in range(1, 13)]

# Drop missing
X, y = X.dropna(), y.loc[X.dropna().index]

## Forecasting & evaluation

In [None]:
# Train on all except last month
model = LinearRegression().fit(X[:-1], y[:-1])

# Next month forecast 
y_pred = model.predict(X[-1:])[0]

# Compute percentage error
percentage_error = abs((y.iloc[-1] - y_pred) / y.iloc[-1]) * 100

# Note: This is NOT a classical MAPE or robust model evaluation.
# We are showing the percentage error on a single month forecast as an illustrative example only.
print(f"Percentage error for next month forecast: {percentage_error:.2f}%")

Percentage error for next month forecast: 1.22%
