In [1]:
!pip install torch
!pip install torchvision
!pip install tqdm



# データ読み込み

In [2]:
import torch
from torchvision import datasets, transforms

# バッチサイズ
BATCH_SIZE = 64

# 画像データの変換方法を指定
transform = transforms.Compose([
    transforms.ToTensor(),        # テンソルに変換 & 0-255 の値を 0-1 に変換
])

# MNIST を取得
train_dataset = datasets.MNIST(
    root='./data',        # データを保存するディレクトリ
    train=True,           # 学習用データを取得
    download=True,        # データがない場合はダウンロードする
    transform=transform,  # 画像データの変換方法を指定
)

# テスト用のデータを取得
test_dataset = datasets.MNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform,
)

# データローダーの作成
train_loader = torch.utils.data.DataLoader(
    train_dataset,          # データセット
    batch_size=BATCH_SIZE,  # バッチサイズを指定
    shuffle=True,           # シャッフルする
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)

print(train_dataset[0][0].shape)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 14.7MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 494kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 4.44MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 5.14MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

torch.Size([1, 28, 28])





# 通常のCNN

In [18]:
import torch
import torch.nn as nn

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

class MNISTCNNModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Sequential(
        # 畳み込み層
        # 1チャンネルを32チャンネルにする。3x3のフィルターを使う。1つずつずらす。
        nn.Conv2d(1, 32,3,1),
        # 活性化関数
        nn.ReLU(),
        # プーリング層、2x2の領域から最大のものを1つずつ取り出す
        nn.MaxPool2d(2,2),
        # Dropout
        nn.Dropout(0.1),
    )
    self.layer2 = nn.Sequential(
        # 畳み込み層
        # 32チャンネルを64チャンネルにする、3x3のフィルターを使う。1つずつずらす。
        nn.Conv2d(32, 64, 3, 1),
        # 活性化関数
        nn.ReLU(),
        # プーリング層、2x2の領域から最大のものを1つずつ取り出す
        nn.MaxPool2d(2,2),
        # Dropout
        nn.Dropout(0.1),
    )
    self.layer3 = nn.Sequential(
        # チャンネル数x縦x横を１次元に変換する
        nn.Flatten(),
        # 線形層
        nn.Linear(64*5*5, 256),
        # 活性化関数
        nn.ReLU(),
        # 線形層
        nn.Linear(256, 10),
        # 出力層
        nn.LogSoftmax(dim=1),

    )

  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    return x

In [19]:
from tqdm import tqdm

model = MNISTCNNModel().to(device)

criterion = nn.NLLLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    total_loss = 0
    for images, labels in tqdm(train_loader):
        optimizer.zero_grad()
        outputs = model(images.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    # loss は平均を取って表示する
    print(f'Epoch: {epoch + 1}, Loss: {total_loss / len(train_loader)}')

100%|██████████| 938/938 [00:10<00:00, 89.09it/s]


Epoch: 1, Loss: 0.1821997898312083


100%|██████████| 938/938 [00:11<00:00, 83.22it/s]


Epoch: 2, Loss: 0.05513018359969269


100%|██████████| 938/938 [00:10<00:00, 87.87it/s] 


Epoch: 3, Loss: 0.03746975997783321


100%|██████████| 938/938 [00:10<00:00, 87.64it/s]


Epoch: 4, Loss: 0.02934418031470483


100%|██████████| 938/938 [00:10<00:00, 89.83it/s]


Epoch: 5, Loss: 0.023829962149343596


100%|██████████| 938/938 [00:10<00:00, 87.85it/s]


Epoch: 6, Loss: 0.018323820124110723


100%|██████████| 938/938 [00:09<00:00, 97.93it/s]


Epoch: 7, Loss: 0.016056163942557603


100%|██████████| 938/938 [00:10<00:00, 91.26it/s]


Epoch: 8, Loss: 0.012551495856081726


100%|██████████| 938/938 [00:10<00:00, 91.24it/s]


Epoch: 9, Loss: 0.01179331426626897


100%|██████████| 938/938 [00:10<00:00, 90.05it/s]

Epoch: 10, Loss: 0.010595222772975702





In [20]:
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.to(device)).sum().item()

print(f"Accuracy: {100 * correct / total}%")

Accuracy: 99.03%


# 課題（プーリング層除去版）

In [21]:
import torch
import torch.nn as nn

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

class MNISTCNNModelNoPool(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Sequential(
        # 畳み込み層
        # 1チャンネルを32チャンネルにする。3x3のフィルターを使う。1つずつずらす。
        nn.Conv2d(1, 32,3,1),
        # 活性化関数
        nn.ReLU(),
        # プーリング層、2x2の領域から最大のものを1つずつ取り出す
        # nn.MaxPool2d(2,2),
        # Dropout
        nn.Dropout(0.1),
    )
    self.layer2 = nn.Sequential(
        # 畳み込み層
        # 32チャンネルを64チャンネルにする、3x3のフィルターを使う。1つずつずらす。
        nn.Conv2d(32, 64, 3, 1),
        # 活性化関数
        nn.ReLU(),
        # プーリング層、2x2の領域から最大のものを1つずつ取り出す
        # nn.MaxPool2d(2,2),
        # Dropout
        nn.Dropout(0.1),
    )
    self.layer3 = nn.Sequential(
        # チャンネル数x縦x横を１次元に変換する
        nn.Flatten(),
        # 線形層
        nn.Linear(64*24*24, 256),
        # 活性化関数
        nn.ReLU(),
        # 線形層
        nn.Linear(256, 10),
        # 出力層
        nn.LogSoftmax(dim=1),

    )

  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    return x

In [22]:
from tqdm import tqdm

model = MNISTCNNModelNoPool().to(device)

criterion = nn.NLLLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(10):
    total_loss = 0
    for images, labels in tqdm(train_loader):
        optimizer.zero_grad()
        outputs = model(images.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    # loss は平均を取って表示する
    print(f'Epoch: {epoch + 1}, Loss: {total_loss / len(train_loader)}')

100%|██████████| 938/938 [00:14<00:00, 66.58it/s]


Epoch: 1, Loss: 0.13546379655487895


100%|██████████| 938/938 [00:13<00:00, 69.65it/s]


Epoch: 2, Loss: 0.03870656430276472


100%|██████████| 938/938 [00:13<00:00, 70.05it/s]


Epoch: 3, Loss: 0.021375070700323198


100%|██████████| 938/938 [00:13<00:00, 69.44it/s]


Epoch: 4, Loss: 0.01368021493793798


100%|██████████| 938/938 [00:13<00:00, 69.38it/s]


Epoch: 5, Loss: 0.01164948907566124


100%|██████████| 938/938 [00:13<00:00, 70.01it/s]


Epoch: 6, Loss: 0.007715226536733189


100%|██████████| 938/938 [00:13<00:00, 67.70it/s]


Epoch: 7, Loss: 0.00653594114257674


100%|██████████| 938/938 [00:13<00:00, 68.69it/s]


Epoch: 8, Loss: 0.006022139888505745


100%|██████████| 938/938 [00:13<00:00, 69.56it/s]


Epoch: 9, Loss: 0.0055118196358952464


100%|██████████| 938/938 [00:13<00:00, 69.39it/s]

Epoch: 10, Loss: 0.003984611043531771





In [23]:
correct = 0
total = 0
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.to(device)).sum().item()

print(f"Accuracy: {100 * correct / total}%")

Accuracy: 98.93%
