In [5]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))


True
4
NVIDIA L4


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import time

# 确保使用 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 定义一个简单的 CNN 模型
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(2)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 初始化模型、损失函数、优化器
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 创建随机数据模拟 MNIST 图片
x_train = torch.randn(512, 1, 28, 28, device=device)  # 512张 28x28 的单通道图片
y_train = torch.randint(0, 10, (512,), device=device) # 随机 0-9 之间的标签

# 训练 10 轮
start_time = time.time()
for epoch in range(10):
    optimizer.zero_grad()
    outputs = model(x_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/10], Loss: {loss.item():.4f}")

end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds.")


Using device: cuda
Epoch [1/10], Loss: 2.3115
Epoch [2/10], Loss: 2.3528
Epoch [3/10], Loss: 2.2938
Epoch [4/10], Loss: 2.2865
Epoch [5/10], Loss: 2.2849
Epoch [6/10], Loss: 2.2783
Epoch [7/10], Loss: 2.2700
Epoch [8/10], Loss: 2.2586
Epoch [9/10], Loss: 2.2428
Epoch [10/10], Loss: 2.2245
Training completed in 0.06 seconds.


In [7]:
import torch
import time

# 选择设备
device_cpu = torch.device("cpu")
device_gpu = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 生成随机矩阵
size = 5000
x_cpu = torch.randn(size, size, device=device_cpu)
y_cpu = torch.randn(size, size, device=device_cpu)

x_gpu = torch.randn(size, size, device=device_gpu)
y_gpu = torch.randn(size, size, device=device_gpu)

# CPU 计算
start_cpu = time.time()
result_cpu = torch.matmul(x_cpu, y_cpu)
end_cpu = time.time()
print(f"CPU computation time: {end_cpu - start_cpu:.4f} seconds")

# GPU 计算
start_gpu = time.time()
result_gpu = torch.matmul(x_gpu, y_gpu)
torch.cuda.synchronize()  # 确保 GPU 计算完成
end_gpu = time.time()
print(f"GPU computation time: {end_gpu - start_gpu:.4f} seconds")


CPU computation time: 4.9254 seconds
GPU computation time: 0.0999 seconds


In [4]:
print("1")

1
