In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet152
import time

# 使用するGPUの数を指定します (2 GPU or 4 GPU)
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"

# データセットの準備
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 128 * 20

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# モデルの定義
model = resnet152(num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# マルチGPUでモデルを並列化する
model = nn.DataParallel(model)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

print("Start Training")
start = time.time()

# 学習ループ
num_epochs = 5
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
    print(f"Epoch {epoch + 1}, Loss: {running_loss / (i + 1)}")

print(time.time() - start)
print("Finished Training")

Files already downloaded and verified
Files already downloaded and verified
Start Training
Epoch 1, Loss: 2.4718886733055117
Epoch 2, Loss: 2.3994854211807253
Epoch 3, Loss: 2.3725346088409425
Epoch 4, Loss: 2.3485316395759583
Epoch 5, Loss: 2.32766740322113
140.95807695388794
Finished Training


In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet152
import time

# 使用するGPUの数を指定します (2 GPU or 4 GPU)
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# データセットの準備
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 128 * 20

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# モデルの定義
model = resnet152(num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# マルチGPUでモデルを並列化する
# model = nn.DataParallel(model)
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

print("Start Training")
start = time.time()

# 学習ループ
num_epochs = 5
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
    print(f"Epoch {epoch + 1}, Loss: {running_loss / (i + 1)}")

print(time.time() - start)
print("Finished Training")

Files already downloaded and verified
Files already downloaded and verified
Start Training
Epoch 1, Loss: 2.475751352310181
Epoch 2, Loss: 2.3936505198478697
Epoch 3, Loss: 2.3590277671813964
Epoch 4, Loss: 2.3364200353622437
Epoch 5, Loss: 2.3081355571746824
240.43317484855652
Finished Training
