In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

In [None]:
# Load and merge data
energy_df = pd.read_csv('energy.csv')
weather_df = pd.read_csv('weather.csv')
df = pd.merge(energy_df, weather_df, on=['state', 'month'])

In [None]:
# Sort data chronologically
df['date'] = pd.to_datetime(df['month'])
df = df.sort_values('date')

In [None]:
# Feature engineering
# df['year'] = df['date'].dt.year
# df['season'] = df['date'].dt.month.map({12:1, 1:1, 2:1, 3:2, 4:2, 5:2, 6:3, 7:3, 8:3, 9:4, 10:4, 11:4})

In [None]:
# Create lag features and rolling statistics
df['energy_lag_1'] = df.groupby('state')['energy_consumption'].shift(1)
df['energy_lag_12'] = df.groupby('state')['energy_consumption'].shift(12)
df['energy_rolling_mean'] = df.groupby('state')['energy_consumption'].rolling(window=3).mean().reset_index(0, drop=True)

In [None]:
# Prepare features and target
features = ['average_temperature', 'average_precipitation', 'average_wind_speed',
            'year', 'season', 'energy_lag_1', 'energy_lag_12', 'energy_rolling_mean']
X = df[features].dropna()
y = df['energy_consumption'].loc[X.index]

In [None]:
# Time-based split
tscv = TimeSeriesSplit(n_splits=5)

In [None]:
# Train and evaluate the model
model = XGBRegressor(n_estimators=100, random_state=42)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    print(f"RMSE: {rmse}")

In [None]:
# Forecast energy consumption
last_data = X.iloc[-1].to_dict()
new_weather_data = {
    'average_temperature': 75,
    'average_precipitation': 2.5,
    'average_wind_speed': 10,
    'year': 2024,
    'season': 2,
    'energy_lag_1': last_data['energy_consumption'],
    'energy_lag_12': X['energy_consumption'].iloc[-12],
    'energy_rolling_mean': X['energy_rolling_mean'].iloc[-1]
}

forecast = model.predict(pd.DataFrame([new_weather_data]))
print(f"Forecasted energy consumption: {forecast[0]}")