### 이미지 분류를 위한 신경망 - LeNet-5

In [53]:
import torch
import torch.nn as nn
import torch.optim as optim

In [68]:
batch_size=1
image =torch.rand(batch_size,1,32,32)
image.shape 



torch.Size([1, 1, 32, 32])

In [69]:
conv=nn.Conv2d(1,6,5 ,  stride=1)

x=conv(image)

# ReLU 활성화 함수 추가
relu = nn.ReLU()
x = relu(x)  # [1, 6, 28, 28] (형상 유지)

x.shape





torch.Size([1, 6, 28, 28])

In [70]:
pool=nn.MaxPool2d(2, stride=2)
x=pool(x)
x.shape

torch.Size([1, 6, 14, 14])

In [71]:
conv2=nn.Conv2d(6,16,5 ,  stride=1)

x=conv2(x)

# ReLU 활성화 함수 추가
relu = nn.ReLU()
x = relu(x)  # [1, 6, 28, 28] (형상 유지)

x.shape

torch.Size([1, 16, 10, 10])

In [72]:
pool2=nn.MaxPool2d(2, stride=2)
x=pool2(x)
x.shape

torch.Size([1, 16, 5, 5])

In [73]:
# 텐서 평탄화
x_flattened = x.view(x.size(0), -1)  # [1, 16*5*5]

# 완전 연결층 정의
linear = nn.Linear(16*5*5, 120)

# 완전 연결층에 입력
output = linear(x_flattened)

# ReLU 활성화 함수 추가
relu = nn.ReLU()
output = relu(output)  # [1, 6, 28, 28] (형상 유지)

print(output.shape)  # 출력 형상 확인

torch.Size([1, 120])


In [74]:
# 완전 연결층 정의
linear2 = nn.Linear(120, 84)

# 완전 연결층에 입력
output = linear2(output)

# ReLU 활성화 함수 추가
relu = nn.ReLU()
output = relu(output)  # [1, 6, 28, 28] (형상 유지)

print(output.shape)  # 출력 형상 확인

torch.Size([1, 84])


In [75]:
# 세 번째 완전 연결층 정의
linear3 = nn.Linear(84, 2)  # 입력 차원 84, 출력 차원 2

# 세 번째 완전 연결층에 입력
output = linear3(output)

# 소프트맥스 활성화 함수 추가
softmax = nn.Softmax(dim=1)
output = softmax(output)  # [1, 2]

print(output.shape)  # 출력 형상 확인
print(output)  # 출력 값 확인

torch.Size([1, 2])
tensor([[0.4646, 0.5354]], grad_fn=<SoftmaxBackward0>)


### 모델의 네트워크 클래스

In [67]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.cnn1=nn.Conv2d(in_channels=1, out_channels=6 , kernel_size=5, stride=1, padding=0)
        self.relu1=nn.ReLU()
        self.maxpool1=nn.MaxPool2d(kernel_size=2,stride=2)
        self.cnn2=nn.Conv2d(in_channels=6, out_channels=16 , kernel_size=5, stride=1, padding=0)
        self.relu2=nn.ReLU()
        self.maxpool2=nn.MaxPool2d(kernel_size=2,stride=2)
      

# 자동으로 Linear 레이어의 입력 차원을 결정
        self._to_linear = None
        self.convs = nn.Sequential(
        self.cnn1,
        self.relu1,
        self.maxpool1,
        self.cnn2,
        self.relu2,
        self.maxpool2
        )
        
        self._get_conv_output((1, 32, 32))  # MNIST 이미지 크기 (32x32)로 조정

        self.fc1=nn.Linear(16*5*5, 120)
        self.relu3=nn.ReLU()
        self.fc2=nn.Linear(120, 84)
        self.relu4=nn.ReLU()
        self.fc3= nn.Linear(84, 2) 
        self.output=nn.Softmax(dim=1)

    def _get_conv_output(self, shape):
        with torch.no_grad():
            n = torch.zeros(1, *shape)
            n = self.convs(n)
            self._to_linear = int(torch.prod(torch.tensor(n.shape[1:])))
        return self._to_linear

    
    def forward(self, x):
        out=self.cnn1(x)
        out=self.relu1(out)
        out=self.maxpool1(out)
        out=self.cnn2(out)
        out=self.relu2(out)
        out=self.maxpool2(out)
        out=out.view(out.size(0), -1)
        out=self.fc1(out)
        out=self.fc2(out)
        out=self.fc3(out)
        out=self.output(out)
        return out

 
        
        

In [68]:
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # MNIST 이미지를 32x32로 조정
    transforms.Grayscale(),       # MNIST 데이터셋은 단일 채널 이미지를 사용하므로 이 변환을 적용
    transforms.ToTensor(),
])


### 모델객체 생성

In [69]:
model=LeNet()
print(model)

LeNet(
  (cnn1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (cnn2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convs): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (relu3): ReLU()
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (relu4): ReLU()
  (fc3): Linear(in_features=84, out_features=2, bias=True)
  (output): Softmax(dim=1)
)


In [70]:
image=torch.rand(64,1,32,32)

In [71]:

model(image)

tensor([[0.4458, 0.5542],
        [0.4472, 0.5528],
        [0.4456, 0.5544],
        [0.4471, 0.5529],
        [0.4481, 0.5519],
        [0.4465, 0.5535],
        [0.4456, 0.5544],
        [0.4451, 0.5549],
        [0.4481, 0.5519],
        [0.4489, 0.5511],
        [0.4452, 0.5548],
        [0.4473, 0.5527],
        [0.4503, 0.5497],
        [0.4451, 0.5549],
        [0.4449, 0.5551],
        [0.4488, 0.5512],
        [0.4496, 0.5504],
        [0.4445, 0.5555],
        [0.4458, 0.5542],
        [0.4472, 0.5528],
        [0.4488, 0.5512],
        [0.4451, 0.5549],
        [0.4477, 0.5523],
        [0.4464, 0.5536],
        [0.4453, 0.5547],
        [0.4448, 0.5552],
        [0.4479, 0.5521],
        [0.4460, 0.5540],
        [0.4489, 0.5511],
        [0.4484, 0.5516],
        [0.4440, 0.5560],
        [0.4460, 0.5540],
        [0.4475, 0.5525],
        [0.4450, 0.5550],
        [0.4474, 0.5526],
        [0.4451, 0.5549],
        [0.4467, 0.5533],
        [0.4465, 0.5535],
        [0.4

In [72]:
import torch.optim as optim
loss_fn=nn.CrossEntropyLoss()
optimizer= optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

### datasets 

In [73]:
from torchvision import datasets ,transforms
import torchvision.transforms.v2 as v2
dataset=datasets.MNIST('data', download=False, transform=v2.ToTensor()
                       )

In [74]:
type(dataset.data), dataset.data.shape

(torch.Tensor, torch.Size([60000, 28, 28]))

In [75]:
type(dataset.targets), dataset.targets.shape

(torch.Tensor, torch.Size([60000]))

In [76]:
from torch.utils.data import DataLoader

In [77]:
data_loader = DataLoader(dataset, batch_size=32)

In [78]:
for X_train , y_label in data_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([32, 1, 28, 28]) torch.Size([32])


In [79]:
60000/32

1875.0

In [80]:
epochs = 1

for _ in range(epochs):
    for X_train, y_label in data_loader:
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = loss_fn(outputs, y_label)
        loss.backward()
        optimizer.step()

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x256 and 400x120)