In [None]:
# Placeholder: Please run your earlier steps before this cell

In [None]:

# 8. Improved Model with Advanced Features

# Feature engineering (extended)
df['lag3'] = df['windspeed_10m'].shift(3)
df['lag6'] = df['windspeed_10m'].shift(6)
df['rolling_mean_3'] = df['windspeed_10m'].rolling(3).mean()
df['delta_wind'] = df['windspeed_10m'] - df['lag1']

# Drop NA from rolling and lags
df.dropna(inplace=True)

# New feature list
features_improved = ['windspeed_10m', 'lag1', 'lag3', 'lag6', 'rolling_mean_3', 'delta_wind', 'hour', 'month', 'dayofweek']

# Train/test split
X_imp = df[features_improved]
y_imp = df['wind_power_actual']
X_train_imp, X_test_imp, y_train_imp, y_test_imp = train_test_split(X_imp, y_imp, test_size=0.2, shuffle=False)

# Train improved model
model_imp = XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=6, subsample=0.8, colsample_bytree=0.8)
model_imp.fit(X_train_imp, y_train_imp)
y_pred_imp = model_imp.predict(X_test_imp)

# Evaluate improved model
rmse_imp = mean_squared_error(y_test_imp, y_pred_imp, squared=False)
r2_imp = r2_score(y_test_imp, y_pred_imp)
print(f'Improved RMSE: {rmse_imp:.2f} MW')
print(f'Improved R² Score: {r2_imp:.2f}')

# Plot results
plt.figure(figsize=(12,5))
plt.plot(y_test_imp.values[:200], label='Actual')
plt.plot(y_pred_imp[:200], label='Predicted (Improved)')
plt.legend()
plt.title('Improved Wind Power Prediction (first 200 hours)')
plt.xlabel('Time Index')
plt.ylabel('Power [MW]')
plt.tight_layout()
plt.show()


In [None]:

# 9. Further Model Enhancements

# Feature engineering - extended
df["windspeed_cubed"] = df["windspeed_10m"] ** 3
df["wind_power_lag1"] = df["wind_power_actual"].shift(1)
df["is_weekend"] = df["dayofweek"].isin([5, 6]).astype(int)

# Add seasonal categories
df["season"] = df["month"].map({
    12: "winter", 1: "winter", 2: "winter",
    3: "spring", 4: "spring", 5: "spring",
    6: "summer", 7: "summer", 8: "summer",
    9: "fall", 10: "fall", 11: "fall"
})
df = pd.get_dummies(df, columns=["season"])

# Drop rows with NaN values from new features
df.dropna(inplace=True)

# Updated feature list
features_further = [
    'windspeed_10m', 'windspeed_cubed', 'lag1', 'lag3', 'lag6',
    'rolling_mean_3', 'delta_wind', 'wind_power_lag1',
    'hour', 'month', 'dayofweek', 'is_weekend',
    'season_fall', 'season_spring', 'season_summer', 'season_winter'
]

# Split data
X_further = df[features_further]
y_further = df['wind_power_actual']
X_train_f, X_test_f, y_train_f, y_test_f = train_test_split(X_further, y_further, test_size=0.2, shuffle=False)

# Train improved model with modified hyperparameters
model_further = XGBRegressor(n_estimators=500, learning_rate=0.01, max_depth=3, subsample=0.7, colsample_bytree=0.7)
model_further.fit(X_train_f, y_train_f)
y_pred_f = model_further.predict(X_test_f)

# Evaluation
rmse_f = mean_squared_error(y_test_f, y_pred_f, squared=False)
r2_f = r2_score(y_test_f, y_pred_f)
print(f'Further Improved RMSE: {rmse_f:.2f} MW')
print(f'Further Improved R² Score: {r2_f:.2f}')

# Plot
plt.figure(figsize=(12,5))
plt.plot(y_test_f.values[:200], label='Actual')
plt.plot(y_pred_f[:200], label='Predicted (Further Improved)')
plt.legend()
plt.title('Further Improved Wind Power Prediction (first 200 hours)')
plt.xlabel('Time Index')
plt.ylabel('Power [MW]')
plt.tight_layout()
plt.show()
