In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
# Machine Learning Libraries
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Statistical Models
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller


# Deep Learning
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, Bidirectional, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Gradient Boosting
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Visualization
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
# Dashboard
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc

# Optimization

from scipy import stats

# Other
import joblib
import json
from tqdm import tqdm
import itertools

### Energy

## EDA

In [None]:
# Load the data
df = pd.read_csv('energy.csv', parse_dates=['time'])

# Initial exploration
print("Dataset Shape:", df.shape)
print(df.head(10))
print("\nColumns:", df.columns.tolist())
print("\nData Types:")
print(df.dtypes)
print("\nMissing Values:")
print(df.isnull().sum().sort_values(ascending=False).head(15))

In [None]:
# Convert time column to datetime
df['time'] = pd.to_datetime(df['time'], utc=True)

# Set datetime as index
df.set_index('time', inplace=True)

# Sort index
df = df.sort_index()

# Select target variable
y = df['total load actual']


In [None]:
# Check missing values
y.isna().sum()


In [None]:
# Interpolate missing values if any
y = y.interpolate(method='time')

In [None]:
plt.figure(figsize=(12,4))
y.plot()
plt.title("Total Load Actual Over Time")
plt.ylabel("Energy Load (MW)")
plt.show()


In [None]:
# One-week seasonal pattern
y['2018-01-01':'2018-01-07'].plot(figsize=(10,4))
plt.title("Weekly Load Pattern")
plt.show()


## Model Selection and Hyperparameter Tuning

In [None]:
# Use last 7 days for testing
train = y.iloc[:-168]
test = y.iloc[-168:]


In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

model = ExponentialSmoothing(
    train,
    trend='add',
    seasonal='add',
    seasonal_periods=24
)

hw_fit = model.fit(optimized=True)



## Forecasting (1day, 3days, 7 days)

In [None]:
forecast_1d = hw_fit.forecast(24)
forecast_3d = hw_fit.forecast(72)
forecast_7d = hw_fit.forecast(168)

In [None]:
plt.figure(figsize=(12,5))
test.plot(label='Actual', color='black')
forecast_7d.plot(label='7-Day Forecast', linestyle='--')
plt.legend()
plt.title("7-Day Total Load Forecast")
plt.ylabel("Energy Load (MW)")
plt.show()


In [None]:
forecast_df = pd.DataFrame({
    '1-Day Forecast': forecast_1d,
    '3-Day Forecast': forecast_3d[:24],
    '7-Day Forecast': forecast_7d[:24]
})

forecast_df.head()


In [None]:
plt.figure(figsize=(14,4))
train[-72:].plot(label='Recent Actual')
forecast_3d.plot(label='3-Day Forecast', linestyle='--')
plt.legend()
plt.title("Operational Load Forecast View")
plt.show()


## WEATHER

In [None]:
#load the data
weather = pd.read_csv("weather.csv") 
weather['dt_iso'] = pd.to_datetime(weather['dt_iso'])
weather = weather.set_index('dt_iso').sort_index()
weather = weather[weather['city_name'] == 'Valencia']

# Ensure regular hourly frequency
weather = weather.asfreq('H')

