## Prepare training data

In [25]:
import torch
import numpy as np

# 定义序列参数
batch_size = 400
sequence_length = 100
input_size = 1

clean_signal = torch.sin(
    torch.linspace(0, 2 * np.pi * batch_size, batch_size * sequence_length)
    ).unsqueeze(-1)
print("clean_signal shape:", clean_signal.size())
clean_batches = clean_signal.view(batch_size, sequence_length, input_size)
print("shape of clean_batches:", clean_batches.size())

# 在正弦波上叠加噪声
noise = torch.randn_like(clean_signal) * 0.2  # 调整噪声强度
noisy_signal = clean_signal + noise
print("noisy_signal shape:", noisy_signal.size())
noisy_batches = noisy_signal.view(batch_size, sequence_length, input_size)
print("shape of noisy_batches:", noisy_batches.size())

clean_signal shape: torch.Size([40000, 1])
shape of clean_batches: torch.Size([400, 100, 1])
noisy_signal shape: torch.Size([40000, 1])
shape of noisy_batches: torch.Size([400, 100, 1])


In [26]:
import pandas as pd
from src.utils import plotters

# Plot noisy_signal 和 clean_signal
noisy_signal_flat = noisy_signal.view(-1).numpy()  # 展平为一维数组，形状 (40000,)
clean_signal_flat = clean_signal.view(-1).numpy()  # 展平为一维数组，形状 (40000,)
plotters.plot_traces(
    pd.DataFrame({
        "Noisy Signal": noisy_signal_flat,
        "Clean Signal": clean_signal_flat
    }),
    width=800,
    height=400,
    mode="lines"
)

# plot batch 1 signal
noisy_batches_flat = noisy_batches[0].view(-1).numpy()  # 展平为一维数组，形状 (40000,)
clean_batches_flat = clean_batches[0].view(-1).numpy()  # 展平为一维数组，形状 (40000,)
plotters.plot_traces(
    title="batch 1",
    data = pd.DataFrame({
                "Noisy Signal": noisy_batches_flat,
                "Clean Signal": clean_batches_flat
            }),
    width=800,
    height=400,
    mode="lines"
)

## Create dataloader

In [27]:
from torch.utils.data import Dataset, DataLoader

# 定义自定义数据集
class TimeSeriesDataset(Dataset):
    def __init__(self, noisy_batches, clean_batches):
        self.noisy_batches = noisy_batches
        self.clean_batches = clean_batches

    def __len__(self):
        return len(self.noisy_batches)

    def __getitem__(self, idx):
        return self.noisy_batches[idx], self.clean_batches[idx]

# 定义数据集和数据加载器
dataset = TimeSeriesDataset(noisy_batches, clean_batches)
dataloader = DataLoader(dataset, batch_size=40, shuffle=False)

print(f"Number of batches in dataloader: {len(dataloader)}")

Number of batches in dataloader: 10


## Define model

In [28]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义 LSTM 降噪模型
class LSTMDenoiser(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)
        return out

# 初始化模型
model = LSTMDenoiser(input_size=1, hidden_size=64, num_layers=2)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

## Train model

In [29]:
# 用于保存训练损失
epoch_losses  = []

num_epochs = 50  # 训练轮数

# 模型训练
for epoch in range(num_epochs):
    epoch_loss = 0
    for i, (noisy_batch, clean_batch) in enumerate(dataloader):
        # 将数据移动到 GPU（如果可用）
        noisy_batch = noisy_batch.float()
        clean_batch = clean_batch.float()

        # 前向传播
        outputs = model(noisy_batch)
        loss = criterion(outputs, clean_batch)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 累加 batch 的损失
        epoch_loss += loss.item()

    # 记录每个 epoch 的平均损失
    epoch_losses.append(epoch_loss / len(dataloader))
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {epoch_losses[-1]:.4f}")

print("Training complete!")

Epoch [1/50], Average Loss: 0.4793
Epoch [2/50], Average Loss: 0.3191
Epoch [3/50], Average Loss: 0.0950
Epoch [4/50], Average Loss: 0.0504
Epoch [5/50], Average Loss: 0.0289
Epoch [6/50], Average Loss: 0.0169
Epoch [7/50], Average Loss: 0.0128
Epoch [8/50], Average Loss: 0.0112
Epoch [9/50], Average Loss: 0.0097
Epoch [10/50], Average Loss: 0.0089
Epoch [11/50], Average Loss: 0.0084
Epoch [12/50], Average Loss: 0.0079
Epoch [13/50], Average Loss: 0.0073
Epoch [14/50], Average Loss: 0.0067
Epoch [15/50], Average Loss: 0.0062
Epoch [16/50], Average Loss: 0.0057
Epoch [17/50], Average Loss: 0.0053
Epoch [18/50], Average Loss: 0.0047
Epoch [19/50], Average Loss: 0.0041
Epoch [20/50], Average Loss: 0.0037
Epoch [21/50], Average Loss: 0.0034
Epoch [22/50], Average Loss: 0.0032
Epoch [23/50], Average Loss: 0.0030
Epoch [24/50], Average Loss: 0.0028
Epoch [25/50], Average Loss: 0.0027
Epoch [26/50], Average Loss: 0.0026
Epoch [27/50], Average Loss: 0.0024
Epoch [28/50], Average Loss: 0.0022
E

## Plot results

In [30]:
import plotly.graph_objects as go

# 绘制训练损失曲线
fig2 = go.Figure()
fig2.add_trace(go.Scatter(y=epoch_losses, mode='lines', name='Training Loss'))
fig2.update_layout(title='Training Loss Curve', xaxis_title='Epoch', yaxis_title='Loss')
fig2.show()

## Inference data

In [33]:
# 预测降噪结果
with torch.no_grad():
    # 确保 noisy_signal 的形状正确
    noisy_signal_input = noisy_signal.view(batch_size, sequence_length, input_size)
    denoised_signal = model(noisy_signal_input)  # 模型输出 (batch_size, sequence_length, input_size)

# 绘制降噪结果与干净信号的对比
fig3 = go.Figure()

# 绘制第一个 batch 的干净信号和降噪信号
fig3.add_trace(go.Scatter(y=noisy_signal_input[0].squeeze().numpy(), mode='lines', name='noisy_signa'))  # 第 0 个 batch
fig3.add_trace(go.Scatter(y=clean_batches[0].squeeze().numpy(), mode='lines', name='Clean Signal'))  # 第 0 个 batch
fig3.add_trace(go.Scatter(y=denoised_signal[0].squeeze().numpy(), mode='lines', name='Denoised Signal'))  # 第 0 个 batch
fig3.update_layout(title='Denoised vs Clean Signal', xaxis_title='Time Step', yaxis_title='Signal Value')
fig3.show()

## Export onnx format model

In [9]:
# 训练完成后
torch.save(model.state_dict(), "trained_lstm_denoiser.pth")
print("Model training completed and weights saved.")

# 加载训练后的模型权重（可选）
model.load_state_dict(torch.load("trained_lstm_denoiser.pth"))

# 导出为 ONNX 格式
onnx_path = "./models/lstm_denoiser.onnx"
input_data = torch.randn(batch_size, sequence_length, input_size)  # Example input data
torch.onnx.export(
    model,
    input_data,
    onnx_path,
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={"input": {0: "batch_size", 1: "sequence_length"},
                  "output": {0: "batch_size", 1: "sequence_length"}},
    opset_version=11,
)
print(f"Model exported to {onnx_path}")

Model training completed and weights saved.
Model exported to ./models/lstm_denoiser.onnx



You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.


Exporting a model to ONNX with a batch_size other than 1, with a variable length with LSTM can cause an error when running the ONNX model wit

## Validate onnx format model

In [13]:
# 创建干净的正弦波信号
time = np.linspace(0, 2 * np.pi, sequence_length)  # 时间步
clean_signal = np.sin(time)  # 单个正弦波信号
clean_signal = np.tile(clean_signal, (batch_size, 1)).reshape(batch_size, sequence_length, input_size)  # 扩展到批次

# 添加随机噪声
noise = np.random.normal(0, 0.2, clean_signal.shape)  # 噪声强度为 0.2
noisy_signal = clean_signal + noise  # 带噪正弦波

# 转换为 float32 类型
input_data = noisy_signal.astype(np.float32)
clean_signal = clean_signal.astype(np.float32)

In [14]:
# 验证onnx格式模型
import onnxruntime as ort
import numpy as np

# 加载 ONNX 模型
session = ort.InferenceSession("models/lstm_denoiser.onnx")

# 推理
outputs = session.run(["output"], {"input": input_data})
print("Output shape:", np.array(outputs).shape)

# 转换为 NumPy 数组
denoised_output = np.array(outputs[0])  # 模型输出，形状 (32, 100, 1)
print("Output shape:", denoised_output.shape)  # 确保形状是 (32, 100, 1)

Output shape: (1, 400, 100, 1)
Output shape: (400, 100, 1)


In [19]:
# 绘制第一个样本的输入信号和模型输出
fig = go.Figure()

# 第一个样本的输入信号（展平）
fig.add_trace(go.Scatter(
    y=input_data[0].squeeze(),
    mode='lines',
    name='Noisy Signal (Input)'
))

# 第一个样本的模型输出（展平）
fig.add_trace(go.Scatter(
    y=denoised_output[0].squeeze(),
    mode='lines',
    name='Denoised Signal (Output)'
))


# 第一个样本的模型输出（展平）
fig.add_trace(go.Scatter(
    y=clean_signal[0].squeeze(),
    mode='lines',
    name='clean_signal (label)'
))


# 设置图表标题和轴标签
fig.update_layout(
    title="Noisy vs Denoised Signal (ONNX Model)",
    xaxis_title="Time Step",
    yaxis_title="Signal Value"
)

# 显示图表
fig.show()
