In [None]:
load_data_code = """
import pandas as pd
from pyspark.sql import SparkSession

def load_data(query: str):
    spark = SparkSession.builder.appName("EnergyProductionForecast").getOrCreate()
    df = spark.sql(query).toPandas()
    return df
"""

with open(f"{base_dir}/data/load_data.py", "w", encoding="utf-8") as f:
    f.write(load_data_code)

In [None]:
# Data Preprocessing: preprocess.py
preprocess_code = """
import pandas as pd

def preprocess_data(df, date_column: str):
    df[date_column] = pd.to_datetime(df[date_column])
    df = df.dropna()
    return df
"""

with open(f"{base_dir}/data/preprocess.py", "w", encoding="utf-8") as f:
    f.write(preprocess_code)

In [None]:
# Feature Engineering: build_features.py
build_features_code = """
def create_lagged_features(df, column: str, lags: int):
    for lag in range(1, lags + 1):
        df[f"{column}_lag_{lag}"] = df[column].shift(lag)
    return df.dropna()
"""

with open(f"{base_dir}/features/build_features.py", "w", encoding="utf-8") as f:
    f.write(build_features_code)

In [None]:
# Model Training: train_model.py
train_model_code = """
from neuralprophet import NeuralProphet

def train_model(df, n_lags: int, n_forecasts: int, quantiles: list):
    model = NeuralProphet(n_lags=n_lags, n_forecasts=n_forecasts, quantiles=quantiles)
    metrics = model.fit(df, freq='H', valid_p=0.2)
    return model, metrics
"""

with open(f"{base_dir}/models/train_model.py", "w", encoding="utf-8") as f:
    f.write(train_model_code)

In [None]:
# Model Evaluation: evaluate_model.py
evaluate_model_code = """
from sklearn.metrics import mean_squared_error

def evaluate_model(actuals, predictions):
    return mean_squared_error(actuals, predictions, squared=False)
"""

with open(f"{base_dir}/models/evaluate_model.py", "w", encoding="utf-8") as f:
    f.write(evaluate_model_code)

In [None]:
# Model Prediction: predict.py
predict_code = """
def predict_future(model, df, periods: int):
    future = model.make_future_dataframe(df, periods=periods)
    forecast = model.predict(future)
    return forecast
"""

with open(f"{base_dir}/models/predict.py", "w", encoding="utf-8") as f:
    f.write(predict_code)

In [None]:
# Logging Utility: logging.py
logging_code = """
import logging

def setup_logging(log_file: str = "app.log"):
    logging.basicConfig(
        filename=log_file,
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
    )
    return logging.getLogger()
"""

with open(f"{base_dir}/utils/logging.py", "w", encoding="utf-8") as f:
    f.write(logging_code)

In [None]:
# Config File: config.py
config_code = """
CONFIG = {
    "data_query": "SELECT * FROM plant_ops.mao_forecast.gesq_accuweather ORDER BY CALENDAR_DATE",
    "date_column": "CALENDAR_DATE",
    "target_column": "target_column",
    "lags": 15,
    "n_forecasts": 15,
    "quantiles": [0.05, 0.5, 0.95],
}
"""