# DNN Augmentation — Section 0 & Load-Curve Dataset
**说明 / Notes：** 在**大致保留原 ipynb 内容**的前提下，新增本节用于：  
1) 统一放置可控参数（epochs、网络层数、采样数量等）；  
2) 随机生成 100 个 `(x, y)` pair（可类比负荷曲线：`x=时间(小时)`，`y=标幺负荷`）；  
3) 自动将 70%/30% 数据划分为训练/测试；  
4) 使用 Keras DNN 拟合（保留原来 notebook 的主体内容在后面）。

In [None]:
# ========= Section 0: Settings =========
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Reproducibility
RNG_SEED = 42

# Data settings
N_SAMPLES = 100           # 随机采样数量
TRAIN_RATIO = 0.70        # 训练/测试比例
NOISE_STD = 0.04          # 噪声强度 (per unit)

# Function type for y = f(x)
# 选项: "sin", "cos", "exp", "poly", "linear", "custom"
FUNC_TYPE = "sin"

# If FUNC_TYPE == "custom", define it here as a python lambda function of x
# 例如：lambda x: 0.5 + 0.3*np.sin(2*np.pi*x/24) + 0.2*np.exp(-0.2*(x-18)**2)
CUSTOM_FUNC = lambda x: 0.5 + 0.3*np.sin(2*np.pi*(x-6)/24) + 0.25*np.exp(-0.5*((x-19)/2.5)**2)

# DNN (Keras) settings
EPOCHS = 800
BATCH_SIZE = 16
INPUT_DIM = 1
HIDDEN_LAYERS = [64, 64]
OUTPUT_DIM = 1
LEARNING_RATE = 1e-3

# Paths to save artifacts
OUT_DIR = Path("/mnt/data")
CSV_PATH = OUT_DIR / "load_pairs_100.csv"
MODEL_PATH = OUT_DIR / "keras_dnn_load_model.h5"
HISTORY_PATH = OUT_DIR / "keras_dnn_history.npy"
METRICS_PATH = OUT_DIR / "keras_dnn_metrics.json"

print("Section 0 settings loaded.")

In [None]:
# ========= Section 1: Generate (x, y) pairs =========
rng = np.random.default_rng(RNG_SEED)

def pick_range_by_func(func_type: str):
    if func_type in ("sin", "cos"):
        return (0.0, 2*np.pi)         # radians domain for trig by default
    elif func_type == "exp":
        return (0.0, 10.0)
    elif func_type == "poly":
        return (-3.0, 3.0)
    elif func_type == "linear":
        return (0.0, 24.0)            # hours
    elif func_type == "custom":
        return (0.0, 24.0)
    else:
        return (0.0, 24.0)

def base_function(x, func_type: str):
    if func_type == "sin":
        return np.sin(x)
    if func_type == "cos":
        return np.cos(x)
    if func_type == "exp":
        return np.exp(0.2*x) / np.exp(0.2*10.0)  # normalized to ~[e^0, e^2]→[~0.135,1]; scaled by division
    if func_type == "poly":
        return 0.1*x**3 - 0.2*x**2 + 0.3*x + 0.6
    if func_type == "linear":
        # daily load-like linear baseline (gently increasing daytime)
        return 0.4 + 0.03*x
    if func_type == "custom":
        return CUSTOM_FUNC(x)
    # default: simple daily load-like curve
    return 0.6 + 0.2*np.sin(2*np.pi*(x-6)/24) + 0.35*np.exp(-0.5*((x-8)/2.0)**2) + 0.5*np.exp(-0.5*((x-19)/2.5)**2)

xmin, xmax = pick_range_by_func(FUNC_TYPE)
x = rng.uniform(xmin, xmax, size=N_SAMPLES)
x = np.sort(x)

# Build "per-unit load-like" y with noise; if FUNC_TYPE is trig/linear/custom with hour meaning,
# we also clip to [0, 1.2] for realism.
y_clean = base_function(x, FUNC_TYPE)
noise = rng.normal(0.0, NOISE_STD, size=N_SAMPLES)
y = y_clean + noise
if FUNC_TYPE in ("linear", "custom"):
    y = np.clip(y, 0.0, 1.2)

# Save dataset
df = pd.DataFrame({"x": x, "y": y, "func": FUNC_TYPE})
df.to_csv(CSV_PATH, index=False)
print("Saved dataset to:", CSV_PATH)

# Quick plot
plt.figure()
plt.scatter(x, y, s=20, label="samples")
plt.xlabel("x (hour or unit domain)")
plt.ylabel("y (per unit)")
plt.title(f"Random {N_SAMPLES} (x, y) samples — {FUNC_TYPE}")
plt.legend()
plt.show()

In [None]:
# ========= Section 2: Train/Test split (70% / 30%) =========
from sklearn.model_selection import train_test_split

X = x.reshape(-1, 1).astype("float32")
Y = y.reshape(-1, 1).astype("float32")

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, train_size=TRAIN_RATIO, random_state=RNG_SEED
)

print("Train size:", X_train.shape[0], " Test size:", X_test.shape[0])

In [None]:
# ========= Section 3: DNN (Keras) training =========
import json
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

tf.random.set_seed(RNG_SEED)

model = keras.Sequential()
model.add(layers.Input(shape=(INPUT_DIM,)))
for h in HIDDEN_LAYERS:
    model.add(layers.Dense(h, activation="relu"))
model.add(layers.Dense(OUTPUT_DIM, activation="linear"))

opt = keras.optimizers.Adam(learning_rate=LEARNING_RATE)
model.compile(optimizer=opt, loss="mse", metrics=["mae"])

history = model.fit(
    X_train, Y_train,
    validation_data=(X_test, Y_test),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=0
)

# Save model and history
model.save(MODEL_PATH)
np.save(HISTORY_PATH, history.history, allow_pickle=True)

# Evaluate
loss, mae = model.evaluate(X_test, Y_test, verbose=0)
metrics = {"test_mse": float(loss), "test_mae": float(mae)}
with open(METRICS_PATH, "w") as f:
    json.dump(metrics, f, indent=2)

print("Saved model to:", MODEL_PATH)
print("Saved history to:", HISTORY_PATH)
print("Saved metrics to:", METRICS_PATH)
print("Test metrics:", metrics)

In [None]:
# ========= Section 4: Visualization of Fit =========
# Predict on dense grid for a smooth curve
t_dense = np.linspace(x.min(), x.max(), 400, dtype="float32").reshape(-1, 1)
y_pred_dense = model.predict(t_dense, verbose=0).flatten()

plt.figure()
plt.plot(t_dense.flatten(), y_pred_dense, label="DNN prediction")
plt.scatter(x, y, s=20, label="samples")
plt.xlabel("x (hour or unit domain)")
plt.ylabel("y (per unit)")
plt.title("Keras DNN Fit vs. Samples")
plt.legend()
plt.show()

In [None]:
# --- Imports ---
# Import necessary libraries for numerical computing, deep learning, and plotting
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

## 1. Generate Training Data

We aim to approximate the function **f(x) = sin(x)** using a simple Deep Neural Network (DNN).  
First, we create 100 evenly spaced points in the range \([0, 2\pi]\) and compute their sine values.


In [None]:
# Generate training data: x in [0, 2π], y = sin(x)
x_train = np.linspace(0, 2 * np.pi, 1028).reshape(-1, 1)
y_train = np.sin(x_train)

## 2. Define the DNN Model

We use TensorFlow/Keras to build a feedforward neural network with:
- An input layer that takes scalar values (shape = 1)
- Two hidden layers with 32 neurons and ReLU activation
- A linear output layer that outputs a single value


In [None]:
model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(1,)),  # Input layer
    layers.Dense(32, activation='relu'),                    # Hidden layer
    layers.Dense(1, activation='linear')                    # Output layer
])

## 3. Compile the Model

We compile the model using:
- **Adam optimizer** for efficient training
- **Mean Squared Error (MSE)** as the loss function
- **Mean Absolute Error (MAE)** as an additional evaluation metric


In [None]:
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

## 4. Train the Model

We train the model on the generated data for 300 epochs with a batch size of 16.
Verbose is set to 0 to suppress output during training.


In [None]:
model.fit(x_train, y_train, epochs=300, batch_size=16, verbose=0)

## 5. Test the Model

We generate the same range of input values for testing and compare:
- The **true values** of \( \sin(x) \)
- The **predicted values** from the trained DNN


In [None]:
x_test = np.linspace(0, 2 * np.pi, 100).reshape(-1, 1)
y_true = np.sin(x_test)
y_pred = model.predict(x_test)

## 6. Plot the Results

We visualize how well the DNN approximates the sine function:
- **Blue solid line**: True function \( \sin(x) \)
- **Red dashed line**: DNN-predicted output


In [None]:
plt.figure(figsize=(8, 5))
plt.plot(x_test, y_true, 'b-', label='True f(x) = sin(x)')
plt.plot(x_test, y_pred, 'r--', label='Predicted by DNN')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.title('DNN Approximation of f(x) = sin(x)')
plt.legend()
plt.grid(True)
plt.show()