## Prepare training data

In [1]:
import torch
import numpy as np

# 定义参数
batch_size = 400
input_size = 1

# 定义随机性参数
sequence_lengths = [50, 100, 200, 500, 800]  # 可选的采样点数
period_range = (1, 100)  # 随机选择周期范围

# 初始化用于存储所有信号的列表dd
all_clean_batches = []
all_noisy_batches = []

# 生成多种随机信号并融合
for sequence_length in sequence_lengths:
    period = np.random.uniform(*period_range)  # 随机选择周期

    # 生成干净信号
    clean_signal = torch.sin(
        torch.linspace(0, 2 * np.pi * period, batch_size * sequence_length)
    ).unsqueeze(-1)   # 添加最后一维，使形状为 (batch_size * sequence_length, 1)

    # # 检查生成信号的长度是否能整齐划分为 batch_size
    # total_points = batch_size * sequence_length
    # if clean_signal.size(0) != total_points:
    #     print(f"Skipping sequence length {sequence_length} due to incompatible size.")
    #     continue

    # 划分为批次形式
    clean_batches = clean_signal.view(batch_size, sequence_length, input_size)

    # 生成噪声信号
    noise = torch.randn_like(clean_signal) * 0.2  # 调整噪声强度
    noisy_signal = clean_signal + noise
    noisy_batches = noisy_signal.view(batch_size, sequence_length, input_size)

    # 将生成的数据添加到列表中
    all_clean_batches.append((sequence_length, clean_batches))
    all_noisy_batches.append((sequence_length, noisy_batches))

# 确保序列长度相同的批次分别拼接
final_clean_batches = []
final_noisy_batches = []

for sequence_length in sequence_lengths:
    clean_batches_group = [cb for sl, cb in all_clean_batches if sl == sequence_length]
    noisy_batches_group = [nb for sl, nb in all_noisy_batches if sl == sequence_length]

    if clean_batches_group and noisy_batches_group:
        final_clean_batches.append(torch.cat(clean_batches_group, dim=0))
        final_noisy_batches.append(torch.cat(noisy_batches_group, dim=0))

# 打印拼接结果
for i, sequence_length in enumerate(sequence_lengths):
    if i < len(final_clean_batches):
        print(f"Final clean batches for sequence length {sequence_length}: {final_clean_batches[i].size()}")
        print(f"Final noisy batches for sequence length {sequence_length}: {final_noisy_batches[i].size()}")


Final clean batches for sequence length 50: torch.Size([400, 50, 1])
Final noisy batches for sequence length 50: torch.Size([400, 50, 1])
Final clean batches for sequence length 100: torch.Size([400, 100, 1])
Final noisy batches for sequence length 100: torch.Size([400, 100, 1])
Final clean batches for sequence length 200: torch.Size([400, 200, 1])
Final noisy batches for sequence length 200: torch.Size([400, 200, 1])
Final clean batches for sequence length 500: torch.Size([400, 500, 1])
Final noisy batches for sequence length 500: torch.Size([400, 500, 1])
Final clean batches for sequence length 800: torch.Size([400, 800, 1])
Final noisy batches for sequence length 800: torch.Size([400, 800, 1])


## Create dataloader

In [2]:
from torch.utils.data import Dataset, DataLoader

# 定义自定义数据集
class TimeSeriesDataset(Dataset):
    def __init__(self, noisy_batches, clean_batches):
        self.noisy_batches = noisy_batches
        self.clean_batches = clean_batches

    def __len__(self):
        return len(self.noisy_batches)

    def __getitem__(self, idx):
        return self.noisy_batches[idx], self.clean_batches[idx]

# 定义数据集和数据加载器
dataset = TimeSeriesDataset(final_noisy_batches, final_clean_batches)
dataloader = DataLoader(dataset, batch_size=40, shuffle=False)

grouped_dataloaders = []
for sequence_length, noisy_batch, clean_batch in zip(sequence_lengths,
                                                     final_noisy_batches,
                                                     final_clean_batches):
    dataset = TimeSeriesDataset(noisy_batch, clean_batch)
    dataloader = DataLoader(dataset, batch_size=40, shuffle=False)
    grouped_dataloaders.append((sequence_length, dataloader))

print(f"Number of batches in dataloader: {len(grouped_dataloaders)}")

Number of batches in dataloader: 5


## Define model

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# 定义 LSTM 降噪模型
class LSTMDenoiser(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)
        return out

# 初始化模型
model = LSTMDenoiser(input_size=1, hidden_size=64, num_layers=2)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Training on:", device)
model.to(device)

Training on: cuda


LSTMDenoiser(
  (lstm): LSTM(1, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [4]:
import torch
print(torch.cuda.is_available())  # 应返回 True
print(torch.version.cuda)        # 检查 CUDA 版本
print(torch.backends.cudnn.version())  # 检查 cuDNN 版本


True
11.7
8500


## Train model

In [5]:
import os

# 用于保存训练损失
epoch_losses  = []

num_epochs = 20  # 训练轮数

# 初始化变量以跟踪最佳模型
best_loss = float('inf')
best_model_path = "models/best_model.pth"
last_model_path = "models/last_model.pth"

# 检查是否存在之前的模型
if os.path.exists(best_model_path):
    print("Loading checkpoint...")
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    best_loss = checkpoint['loss']
    best_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch} with best loss {best_loss:.8f}")
else:
    print("No checkpoint found, starting fresh training.")

# 模型训练
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs} started.")
    epoch_loss = 0

    # 遍历每个 grouped_dataloader
    for sequence_length, dataloader in grouped_dataloaders:
        print(f"Training on sequence length {sequence_length}...")
        group_loss = 0  # 用于记录该组的损失

        for i, (noisy_batch, clean_batch) in enumerate(dataloader):
            # 将数据移动到 GPU（如果可用）
            noisy_batch = noisy_batch.to(device).float()
            clean_batch = clean_batch.to(device).float()

            # 前向传播
            outputs = model(noisy_batch)
            loss = criterion(outputs, clean_batch)

            # 反向传播和优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 累加 batch 的损失
            group_loss += loss.item()

        # 打印每个组的平均损失
        avg_group_loss = group_loss / len(dataloader)
        print(f"Average loss for sequence length {sequence_length}: {avg_group_loss:.8f}")
        epoch_loss += group_loss

    # 记录每个 epoch 的平均损失
    avg_loss = epoch_loss / sum(len(dl) for _, dl in grouped_dataloaders)
    epoch_losses.append(avg_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.8f}")

    # 检查是否为最佳模型
    if avg_loss < best_loss: 
        best_epoch=epoch+1
        best_loss = avg_loss
        # torch.save(model.state_dict(), best_model_path)
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': best_loss,
            },
            best_model_path
        )
        print(f"New best model saved: {best_model_path} with loss: {best_loss:.8f}")

    # 保存当前 epoch 的模型
    # torch.save(model.state_dict(), last_model_path)
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': avg_loss,
        },
        last_model_path
    )
    print(f"Last model saved at epoch {epoch+1}")

print(f"Training complete! Best model was from epoch {best_epoch} with loss: "
      f"{best_loss:.8f}")

Loading checkpoint...
Resuming training from epoch 17 with best loss 0.00478871
Epoch 1/20 started.
Training on sequence length 50...
Average loss for sequence length 50: 0.00599054
Training on sequence length 100...
Average loss for sequence length 100: 0.00351165
Training on sequence length 200...
Average loss for sequence length 200: 0.00303028
Training on sequence length 500...
Average loss for sequence length 500: 0.00250429
Training on sequence length 800...
Average loss for sequence length 800: 0.00222316
Epoch [1/20], Average Loss: 0.00345198
New best model saved: models/best_model.pth with loss: 0.00345198
Last model saved at epoch 1
Epoch 2/20 started.
Training on sequence length 50...
Average loss for sequence length 50: 0.00640921
Training on sequence length 100...
Average loss for sequence length 100: 0.00331726
Training on sequence length 200...
Average loss for sequence length 200: 0.00285649
Training on sequence length 500...
Average loss for sequence length 500: 0.0023

## Plot results

In [6]:
import plotly.graph_objects as go

# 绘制训练损失曲线
fig2 = go.Figure()
fig2.add_trace(go.Scatter(y=epoch_losses, mode='lines', name='Training Loss'))
fig2.update_layout(title='Training Loss Curve', xaxis_title='Epoch', yaxis_title='Loss')
fig2.show()

## Inference data

In [9]:
# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 假设你有一个定义好的模型类
model = LSTMDenoiser(input_size=1, hidden_size=64, num_layers=2)
model.to(device)

# 加载已训练的模型权重
checkpoint = torch.load("models/best_model.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # 设置模型为评估模式

# 预测降噪结果
with torch.no_grad():
    denoised_signal = model(final_noisy_batches[0][15].to(device))  # 添加 batch 维度 (1, sequence_length, input_size)
    denoised_signal = denoised_signal.squeeze(0)  # 移除 batch 维度，形状为 (sequence_length, input_size)

# 绘制降噪结果与干净信号的对比
fig3 = go.Figure()

# 绘制第一个 batch 的噪声信号、干净信号和去噪信号
fig3.add_trace(go.Scatter(y=final_noisy_batches[0][15].squeeze().cpu().numpy(), mode='lines', name='Noisy Signal'))  # 噪声信号
fig3.add_trace(go.Scatter(y=final_clean_batches[0][15].squeeze().cpu().numpy(), mode='lines', name='Clean Signal'))  # 干净信号
fig3.add_trace(go.Scatter(y=denoised_signal.squeeze().cpu().numpy(), mode='lines', name='Denoised Signal'))  # 去噪信号

fig3.update_layout(
    title='Denoised vs Clean Signal',
    xaxis_title='Time Step',
    yaxis_title='Signal Value',
    legend_title='Signal Type'
)

fig3.show()

In [11]:
import numpy as np
import torch
import plotly.graph_objects as go

# 模拟信号
sequence_length = 1000

# linspace(start, stop, num=50)
clean_signal = np.sin(np.linspace(0, 2 * np.pi * 5, sequence_length)).astype(np.float32)  # 模拟正弦波干净信号
print("clean_signal shape:", clean_signal.shape)
noise = np.random.normal(0, 0.2, sequence_length).astype(np.float32)  # 添加噪声
noisy_signal = clean_signal + noise  # 噪声信号
print("noisy_signal shape:", noisy_signal.shape)

# 转换为 PyTorch 张量
noisy_signal_tensor = torch.from_numpy(noisy_signal).unsqueeze(1).unsqueeze(0)  # (1, sequence_length, 1)
print("noisy_signal_tensor shape:", noisy_signal_tensor.shape)

# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Training on:", device)

# 假设你有一个定义好的模型类
model = LSTMDenoiser(input_size=1, hidden_size=64, num_layers=2)
model.to(device)

# 加载已训练的模型权重
checkpoint = torch.load("models/best_model.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # 设置模型为评估模式

# 预测降噪结果
with torch.no_grad():
    denoised_signal = model(noisy_signal_tensor.to(device))  # 模型输出 (batch_size, sequence_length, input_size)
    print("denoised_signal shape:", denoised_signal.shape)
    
# 绘制对比图
fig3 = go.Figure()

fig3.add_trace(go.Scatter(y=noisy_signal, mode='lines', name='Noisy Signal'))  # 噪声信号
fig3.add_trace(go.Scatter(y=clean_signal, mode='lines', name='Clean Signal'))  # 干净信号
fig3.add_trace(go.Scatter(y=denoised_signal.squeeze().cpu().numpy(), mode='lines', name='Denoised Signal'))  # 去噪信号

# 更新布局
fig3.update_layout(
    title='Denoised vs Clean Signal',
    xaxis_title='Time Step',
    yaxis_title='Signal Value',
    legend_title='Signal Type'
)

# 显示图表
fig3.show()

clean_signal shape: (1000,)
noisy_signal shape: (1000,)
noisy_signal_tensor shape: torch.Size([1, 1000, 1])
Training on: cuda
denoised_signal shape: torch.Size([1, 1000, 1])


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.