## Prepare training data

In [1]:
import torch
import numpy as np

# 定义参数
batch_size = 40
input_size = 1

# 定义随机性参数
sequence_lengths = [50, 100, 200, 500, 800]  # 可选的采样点数
period_range = (1, 100)  # 随机选择周期范围

all_clean_signals = []
all_noisy_signals = []

# 生成多种随机信号并融合
for sequence_length in sequence_lengths:
    period = np.random.uniform(*period_range)  # 随机选择周期

    # 生成干净信号
    clean_signal = torch.sin(
        torch.linspace(0, 2 * np.pi * period, batch_size * sequence_length)
    ).unsqueeze(-1)   # 添加最后一维，使形状为 (batch_size * sequence_length, 1)

    # 生成噪声信号
    noise = torch.randn_like(clean_signal) * 0.2  # 调整噪声强度
    noisy_signal = clean_signal + noise
    
    # Add to list
    all_clean_signals.append(clean_signal)
    all_noisy_signals.append(noisy_signal)

# Concatenate all signals
final_clean_signal = torch.cat(all_clean_signals, dim=0)[:30000]
final_noisy_signal = torch.cat(all_noisy_signals, dim=0)[:30000]

print("final_clean_signal shape: ", final_clean_signal.shape)
print("final_noisy_signal shape: ", final_noisy_signal.shape)

final_clean_signal shape:  torch.Size([30000, 1])
final_noisy_signal shape:  torch.Size([30000, 1])


In [2]:
import pandas as pd
from src.utils import plotters

# Plot noisy_signal 和 clean_signal
noisy_signal_flat = noisy_signal.squeeze()  # 展平为一维数组，形状 (40000,)
clean_signal_flat = clean_signal.squeeze()  # 展平为一维数组，形状 (40000,)
plotters.plot_traces(
    pd.DataFrame({
        "Noisy Signal": noisy_signal_flat,
        "Clean Signal": clean_signal_flat
    }),
    width=800,
    height=400,
    mode="lines"
)

## Create dataloader

In [3]:
from torch.utils.data import Dataset, DataLoader

# 定义自定义数据集
class TimeSeriesDataset(Dataset):
    def __init__(self, noisy_signal, clean_signal, sequence_length):
        self.noisy_signal  = noisy_signal
        self.clean_signal  = clean_signal
        self.sequence_length = sequence_length

    def __len__(self):
        # 返回可以分成的完整序列数
        return len(self.noisy_signal) - self.sequence_length + 1

    def __getitem__(self, idx):
        noisy_sequence = self.noisy_signal[idx:idx + self.sequence_length]
        clean_sequence = self.clean_signal[idx:idx + self.sequence_length]
        return noisy_sequence, clean_sequence

# Define grouped_dataloaders and Dataset
grouped_dataloaders = {}
for sequence_length in sequence_lengths:    
    # Create DataLoader
    dataset = TimeSeriesDataset(final_noisy_signal, final_clean_signal, sequence_length)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    grouped_dataloaders[sequence_length] = dataloader

# 检查数据加载器
for sequence_length, dataloader in grouped_dataloaders.items():    
    print(f"Sequence length: {sequence_length}, \nTotal batches: {len(dataloader)}")  # 打印总批次数
    for i, (noisy_sequence, clean_sequence) in enumerate(dataloader):
        print(f"Noisy batch shape: {noisy_sequence.shape}")
        print(f"Clean batch shape: {clean_sequence.shape}")
        break  # 只打印第一个 batch

Sequence length: 50, 
Total batches: 749
Noisy batch shape: torch.Size([40, 50, 1])
Clean batch shape: torch.Size([40, 50, 1])
Sequence length: 100, 
Total batches: 748
Noisy batch shape: torch.Size([40, 100, 1])
Clean batch shape: torch.Size([40, 100, 1])
Sequence length: 200, 
Total batches: 746
Noisy batch shape: torch.Size([40, 200, 1])
Clean batch shape: torch.Size([40, 200, 1])
Sequence length: 500, 
Total batches: 738
Noisy batch shape: torch.Size([40, 500, 1])
Clean batch shape: torch.Size([40, 500, 1])
Sequence length: 800, 
Total batches: 731
Noisy batch shape: torch.Size([40, 800, 1])
Clean batch shape: torch.Size([40, 800, 1])


## Define model

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义 LSTM 降噪模型
class LSTMDenoiser(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)
        return out

# 初始化模型
model = LSTMDenoiser(input_size=1, hidden_size=32, num_layers=2)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Training on:", device)
model.to(device)

Training on: cuda


LSTMDenoiser(
  (lstm): LSTM(1, 32, num_layers=2, batch_first=True)
  (fc): Linear(in_features=32, out_features=1, bias=True)
)

## Train model

In [5]:
import os

# 用于保存训练损失
epoch_losses  = []

num_epochs = 10  # 训练轮数

# 初始化变量以跟踪最佳模型
best_loss = float('inf')
best_model_path = "models/best_model.pth"
last_model_path = "models/last_model.pth"

# 检查是否存在保存的模型检查点
if not os.path.exists("models"):
    os.makedirs("models")

# Check if checkpoint exists
if os.path.exists(best_model_path):
    print("Loading checkpoint...")
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    best_loss = checkpoint['loss']
    best_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch} with best loss {best_loss:.8f}")
else:
    print("No checkpoint found, starting fresh training.")

# 模型设置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Training on:", device)
model.to(device)

# Train model
model.train()

# 在训练时遍历每组
for sequence_length, dataloader in grouped_dataloaders.items():
    print(f"Training with sequence length {sequence_length}")

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs} started.")
        epoch_loss = 0.0

        # 遍历数据加载器
        for i, (noisy_batch, clean_batch) in enumerate(dataloader):
            # 将数据移动到 GPU（如果可用）
            noisy_batch = noisy_batch.to(device).float()
            clean_batch = clean_batch.to(device).float()

            # 前向传播
            outputs = model(noisy_batch)
            loss = criterion(outputs, clean_batch)

            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 累加 batch 的损失
            epoch_loss += loss.item()

        # 记录每个 epoch 的平均损失
        avg_loss = epoch_loss / len(dataloader)
        epoch_losses.append(avg_loss)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Average Loss: {avg_loss:.8f}")

        # 检查是否为最佳模型
        if avg_loss < best_loss: 
            best_epoch = epoch + 1
            best_loss = avg_loss
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': best_loss,
                },
                best_model_path
            )
            print(f"New best model saved: {best_model_path} with loss: {best_loss:.8f}")

        # 保存当前 epoch 的模型
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss,
            },
            last_model_path
        )
        print(f"Last model saved at epoch {epoch + 1}")

print(f"Training complete! Best model was from epoch {best_epoch} with loss: {best_loss:.8f}")

No checkpoint found, starting fresh training.
Training on: cuda
Training with sequence length 50
Epoch 1/10 started.
Epoch [1/10], Average Loss: 0.05867065
New best model saved: models/best_model.pth with loss: 0.05867065
Last model saved at epoch 1
Epoch 2/10 started.
Epoch [2/10], Average Loss: 0.02025222
New best model saved: models/best_model.pth with loss: 0.02025222
Last model saved at epoch 2
Epoch 3/10 started.
Epoch [3/10], Average Loss: 0.01828320
New best model saved: models/best_model.pth with loss: 0.01828320
Last model saved at epoch 3
Epoch 4/10 started.
Epoch [4/10], Average Loss: 0.01660352
New best model saved: models/best_model.pth with loss: 0.01660352
Last model saved at epoch 4
Epoch 5/10 started.
Epoch [5/10], Average Loss: 0.01545711
New best model saved: models/best_model.pth with loss: 0.01545711
Last model saved at epoch 5
Epoch 6/10 started.
Epoch [6/10], Average Loss: 0.01333877
New best model saved: models/best_model.pth with loss: 0.01333877
Last model sa

## Plot results

In [6]:
import plotly.graph_objects as go

# 绘制训练损失曲线
fig2 = go.Figure()
fig2.add_trace(go.Scatter(y=epoch_losses, mode='lines', name='Training Loss'))
fig2.update_layout(title='Training Loss Curve', xaxis_title='Epoch', yaxis_title='Loss')
fig2.show()

## Inference data

In [12]:
# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 假设你有一个定义好的模型类
model = LSTMDenoiser(input_size=1, hidden_size=32, num_layers=2)
model.to(device)

# 加载已训练的模型权重
checkpoint = torch.load("models/best_model.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # 设置模型为评估模式

# 使用 noisy_signal[:1000] 测试模型
test_noisy_signal = torch.tensor(noisy_signal[:2000]).float().unsqueeze(-1)  # 转换为 PyTorch 张量，添加最后一维
test_noisy_signal = test_noisy_signal.to(device)  # 移动到设备

# 预测降噪结果
with torch.no_grad():
    denoised_signal = model(test_noisy_signal)   # 预测去噪信号
    denoised_signal = denoised_signal.squeeze(-1).cpu().numpy()  # 转换为 NumPy 数组

# 绘制降噪结果与干净信号的对比
fig3 = go.Figure()

# 绘制第一个 batch 的噪声信号、干净信号和去噪信号
fig3.add_trace(go.Scatter(y=noisy_signal[:2000].squeeze(), mode='lines', name='Noisy Signal'))  # 噪声信号
fig3.add_trace(go.Scatter(y=clean_signal[:2000].squeeze(), mode='lines', name='Clean Signal'))  # 干净信号
fig3.add_trace(go.Scatter(y=denoised_signal.squeeze(), mode='lines', name='Denoised Signal'))  # 去噪信号

fig3.update_layout(
    title='Denoised vs Clean Signal',
    xaxis_title='Time Step',
    yaxis_title='Signal Value',
    legend_title='Signal Type'
)

fig3.show()

In [10]:
import numpy as np
import torch
import plotly.graph_objects as go

# 模拟信号
sequence_length = 1000

# linspace(start, stop, num=50)
clean_signal = np.sin(np.linspace(0, 2 * np.pi * 20, sequence_length)).astype(np.float32)  # 模拟正弦波干净信号
print("clean_signal shape:", clean_signal.shape)
noise = np.random.normal(0, 0.2, sequence_length).astype(np.float32)  # 添加噪声
noisy_signal = clean_signal + noise  # 噪声信号
print("noisy_signal shape:", noisy_signal.shape)

# 转换为 PyTorch 张量
noisy_signal_tensor = torch.from_numpy(noisy_signal).unsqueeze(1).unsqueeze(0)  # (1, sequence_length, 1)
print("noisy_signal_tensor shape:", noisy_signal_tensor.shape)

# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Training on:", device)

# 假设你有一个定义好的模型类
model = LSTMDenoiser(input_size=1, hidden_size=32, num_layers=2)
model.to(device)

# 加载已训练的模型权重
checkpoint = torch.load("models/best_model.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # 设置模型为评估模式

# 预测降噪结果
with torch.no_grad():
    denoised_signal = model(noisy_signal_tensor.to(device))  # 模型输出 (batch_size, sequence_length, input_size)
    print("denoised_signal shape:", denoised_signal.shape)
    
# 绘制对比图
fig3 = go.Figure()

fig3.add_trace(go.Scatter(y=noisy_signal, mode='lines', name='Noisy Signal'))  # 噪声信号
fig3.add_trace(go.Scatter(y=clean_signal, mode='lines', name='Clean Signal'))  # 干净信号
fig3.add_trace(go.Scatter(y=denoised_signal.squeeze().cpu().numpy(), mode='lines', name='Denoised Signal'))  # 去噪信号

# 更新布局
fig3.update_layout(
    title='Denoised vs Clean Signal',
    xaxis_title='Time Step',
    yaxis_title='Signal Value',
    legend_title='Signal Type'
)

# 显示图表
fig3.show()

clean_signal shape: (1000,)
noisy_signal shape: (1000,)
noisy_signal_tensor shape: torch.Size([1, 1000, 1])
Training on: cuda
denoised_signal shape: torch.Size([1, 1000, 1])
