In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import time
import os

# 定义神经网络模型
class OddEvenPredictor(nn.Module):
    def __init__(self, num_embeddings, embedding_dim):
        super(OddEvenPredictor, self).__init__()
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.network = nn.Sequential(
            nn.Linear(embedding_dim, 4),
            nn.ReLU(),
            nn.Linear(4, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.embedding(x.long())
        x = x.view(x.size(0), -1)  # 展平嵌入
        return self.network(x)

# 检查是否有CUDA，如果有则使用GPU，否则使用CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 实例化模型并移动到GPU
model = OddEvenPredictor(100, 1).to(device)

# 定义损失函数和优化器
criterion = nn.BCELoss()  # 二元交叉熵损失
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练数据
X_train = torch.tensor([[i] for i in range(0, 100)], dtype=torch.long)
y_train = torch.tensor([[int(i % 2 == 0)] for i in range(0, 100)], dtype=torch.float)

# 创建Dataset和DataLoader
dataset = TensorDataset(X_train, y_train)
dataloader = DataLoader(dataset, batch_size=100, shuffle=True)

# 记录训练开始时间
start_time = time.time()

checkpoint_dir = '/mnt/workspace/odd_even_model'  # 定义checkpoint的保存目录

# 训练循环
epochs = 10000
for epoch in range(epochs):
    for batch_X, batch_y in dataloader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()   # 清空梯度
        outputs = model(batch_X)  # 前向传播
        loss = criterion(outputs, batch_y)  # 计算损失
        loss.backward()  # 反向传播
        optimizer.step()  # 更新权重

    if (epoch+1) % 100 == 0:
        # 打印训练损失和耗时
        end_time = time.time()
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}, Time Elapsed {end_time - start_time:.2f} seconds')
    if (epoch+1) % 1000 == 0:
        # 保存checkpoint
        checkpoint_path = f'{checkpoint_dir}/checkpoint_epoch_{epoch+1}.pth'
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss.item(),
            'epoch': epoch + 1,
        }, checkpoint_path)
        print(f'Checkpoint saved at {checkpoint_path}')

# 记录训练结束时间
end_time = time.time()
# 计算训练耗时
elapsed_time = end_time - start_time
# 转换耗时为小时、分钟和秒
hours = int(elapsed_time // 3600)
minutes = int((elapsed_time % 3600) // 60)
seconds = int(elapsed_time % 60)
print(f'Training completed in {hours} hours, {minutes} minutes and {seconds} seconds')

Epoch [100/10000], Loss: 0.6876, Time Elapsed 0.22 seconds
Epoch [200/10000], Loss: 0.6708, Time Elapsed 0.44 seconds
Epoch [300/10000], Loss: 0.6383, Time Elapsed 0.66 seconds
Epoch [400/10000], Loss: 0.5879, Time Elapsed 0.87 seconds
Epoch [500/10000], Loss: 0.5134, Time Elapsed 1.09 seconds
Epoch [600/10000], Loss: 0.4275, Time Elapsed 1.30 seconds
Epoch [700/10000], Loss: 0.3460, Time Elapsed 1.52 seconds
Epoch [800/10000], Loss: 0.2753, Time Elapsed 1.74 seconds
Epoch [900/10000], Loss: 0.2231, Time Elapsed 1.95 seconds
Epoch [1000/10000], Loss: 0.1866, Time Elapsed 2.17 seconds
Checkpoint saved at /mnt/workspace/odd_even_model/checkpoint_epoch_1000.pth
Epoch [1100/10000], Loss: 0.1611, Time Elapsed 2.38 seconds
Epoch [1200/10000], Loss: 0.1431, Time Elapsed 2.60 seconds
Epoch [1300/10000], Loss: 0.1300, Time Elapsed 2.81 seconds
Epoch [1400/10000], Loss: 0.1202, Time Elapsed 3.02 seconds
Epoch [1500/10000], Loss: 0.1125, Time Elapsed 3.24 seconds
Epoch [1600/10000], Loss: 0.0980,

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import time
import os

#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
#os.environ['TORCH_USE_CUDA_DSA'] = "1"
# 定义神经网络模型
class OddEvenPredictor(nn.Module):
    def __init__(self, num_embeddings, embedding_dim):
        super(OddEvenPredictor, self).__init__()
        self.embedding = nn.Embedding(num_embeddings, embedding_dim)
        self.network = nn.Sequential(
            nn.Linear(embedding_dim, 4),
            nn.ReLU(),
            nn.Linear(4, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.embedding(x.long())
        x = x.view(x.size(0), -1)  # 展平嵌入
        return self.network(x)

# 检查是否有CUDA，如果有则使用GPU，否则使用CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 实例化模型并移动到GPU
model = OddEvenPredictor(100, 1).to(device)

# 加载模型的状态字典
# 请确保 checkpoint_path 指向正确的模型文件路径
checkpoint_path = '/mnt/workspace/odd_even_model/checkpoint_epoch_10000.pth'
checkpoint = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
# 打印模型的参数

for name, param in model.named_parameters():
    print(f"Parameter name: {name}")
    print(f"Parameter size: {param.size()}")
    print(f"Parameter values: {param}")
    print("\n")

# 将模型设置为评估模式
model.eval()

# 准备预测数据
# 这里我们创建一个包含多个测试数字的张量
test_numbers = torch.tensor([[i] for i in range(0, 100)], dtype=torch.long).to(device)

# 进行预测
with torch.no_grad():  # 预测时不计算梯度
    predictions = model(test_numbers)
    predictions = predictions.squeeze()  # 移除多余的维度，方便打印
    predicted_odd_even = ['奇数' if pred < 0.5 else '偶数' for pred in predictions]

# 打印预测结果
for number, odd_even in zip(test_numbers.tolist(), predicted_odd_even):
    print(f'The number {number[0]} is predicted as {odd_even}')

Parameter name: embedding.weight
Parameter size: torch.Size([100, 1])
Parameter values: Parameter containing:
tensor([[ 2.2162],
        [-0.3710],
        [ 2.2167],
        [-0.3710],
        [-3.2940],
        [-0.3710],
        [-3.2781],
        [-0.3710],
        [-3.2711],
        [-0.3710],
        [-3.2602],
        [-0.3711],
        [-3.2759],
        [-0.3710],
        [-3.3754],
        [-0.3710],
        [ 2.2162],
        [-0.3710],
        [ 2.2168],
        [-0.3710],
        [-3.2406],
        [-0.1724],
        [ 2.2235],
        [-0.3710],
        [ 2.2163],
        [-0.3710],
        [ 2.2161],
        [-0.3710],
        [ 2.2161],
        [-0.3710],
        [ 2.2164],
        [-0.3710],
        [ 2.2178],
        [-0.3710],
        [-3.1229],
        [-0.3710],
        [ 2.2206],
        [-0.3710],
        [ 2.2157],
        [-0.3710],
        [ 2.2160],
        [-0.3710],
        [ 2.2162],
        [-0.3710],
        [ 2.2160],
        [-0.3710],
        [ 2.2164