In [1]:
import torch.nn as nn
import torch

In [2]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=0),
                                     nn.ReLU(),
                                     nn.MaxPool2d(2), #풀링을 통해서 원래 이미지 픽셀 반으로 줄게 됨 
                                     nn.Conv2d(16, 32, 5, 1, 0), #입력채널 16, 아웃채널 32, 커널 5
                                     nn.ReLU(),
                                     nn.MaxPool2d(2) #2차 풀링을 통해서 또 반으로 줄게 됨 근데 stride 때문에 정확히 반은 아님 
                                    )

        self.flatten = nn.Flatten() #선형 넣기전에 평탄화 작업
        
        self.classifier = nn.Sequential(nn.Linear(32*50*50, 512), #피쳐수(채널 수*높이*너비)는 2차 풀링 레이어를 통과한 후의 높이 너비가 50 50이라는거임. 
                                        nn.ReLU(),
                                        nn.Linear(512, 2), #내가 줬던 배치 크기=샘플 수=row, 출력되는 클래스는 2개로 각 2개에 속할 확률 
                                        nn.Softmax(dim=1) #Softmax(dim=1)을 통과하면, 샘플 당 피쳐 모양에서-> 각 클래스에 속할 확률로, 합 1로 변경되어서 나옴.
                                        )

    def forward(self, x):
        x = self.features(x)
        x = self.flatten(x)
        x = self.classifier(x)
        return x
        
#      out = self.cnn1(x)
#        out = self.relu1(out)
#        out = self.maxpool1(out)
#        out = self.cnn2(out)
#        out = self.relu2(out)
#        out = self.maxpool2(out)
        
#        out = self.flatten(out)
#        out = self.fc1(out)
#        out = self.relu5(out)
#        out = self.fc2(out)
#        out = self.output(out)
#        return out

In [3]:
model = LeNet5()
print(model)

LeNet5(
  (features): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (classifier): Sequential(
    (0): Linear(in_features=80000, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=2, bias=True)
    (3): Softmax(dim=1)
  )
)


In [4]:
image = torch.rand(10, 3, 212, 212) #배치 크기(10)는 내가 알고자 하는 샘플의 갯수라고 생각하면 됨 
model(image)

tensor([[0.5114, 0.4886],
        [0.5057, 0.4943],
        [0.5095, 0.4905],
        [0.5115, 0.4885],
        [0.5066, 0.4934],
        [0.5105, 0.4895],
        [0.5072, 0.4928],
        [0.5073, 0.4927],
        [0.5123, 0.4877],
        [0.5067, 0.4933]], grad_fn=<SoftmaxBackward0>)

In [5]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(1, 6, 5, padding='same'),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(6, 16, 5, padding='same'),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2),
                                    nn.Conv2d(16, 126, 5, padding='same'),
                                    nn.ReLU(),
                                    nn.MaxPool2d(2)
                                     )
        self.flatten=nn.Flatten()

        self.classifier= nn.Sequential(nn.Linear(126*3*3, 128),
                                       nn.ReLU(),
                                       nn.Linear(128, 64),
                                       nn.ReLU(),
                                       nn.Linear(64, 10), #출력이 10이므로 softmax 통과하고 나면 각 10개의 클래스에 속할 확률이 모두합 1로 나오게 될 것임
                                       nn.Softmax(dim=1)
                                      )

    def forward(self, x):
        x = self.features(x)
        x = self.flatten(x)
        x = self.classifier(x)
        return x
                                    

In [6]:
from torchvision import datasets
from torchvision.datasets import MNIST
from torchvision.transforms import v2

In [7]:
dataset = datasets.MNIST('data', download=True, transform=v2.ToTensor()) #download default는 false이므로, 실행 후 data안에 MNIST 폴더가 생긴걸 볼 수 있음 



In [8]:
dir(datasets)

['CIFAR10',
 'CIFAR100',
 'CLEVRClassification',
 'CREStereo',
 'Caltech101',
 'Caltech256',
 'CarlaStereo',
 'CelebA',
 'Cityscapes',
 'CocoCaptions',
 'CocoDetection',
 'Country211',
 'DTD',
 'DatasetFolder',
 'EMNIST',
 'ETH3DStereo',
 'EuroSAT',
 'FER2013',
 'FGVCAircraft',
 'FakeData',
 'FallingThingsStereo',
 'FashionMNIST',
 'Flickr30k',
 'Flickr8k',
 'Flowers102',
 'FlyingChairs',
 'FlyingThings3D',
 'Food101',
 'GTSRB',
 'HD1K',
 'HMDB51',
 'INaturalist',
 'ImageFolder',
 'ImageNet',
 'Imagenette',
 'InStereo2k',
 'KMNIST',
 'Kinetics',
 'Kitti',
 'Kitti2012Stereo',
 'Kitti2015Stereo',
 'KittiFlow',
 'LFWPairs',
 'LFWPeople',
 'LSUN',
 'LSUNClass',
 'MNIST',
 'Middlebury2014Stereo',
 'MovingMNIST',
 'Omniglot',
 'OxfordIIITPet',
 'PCAM',
 'PhotoTour',
 'Places365',
 'QMNIST',
 'RenderedSST2',
 'SBDataset',
 'SBU',
 'SEMEION',
 'STL10',
 'SUN397',
 'SVHN',
 'SceneFlowStereo',
 'Sintel',
 'SintelStereo',
 'StanfordCars',
 'UCF101',
 'USPS',
 'VOCDetection',
 'VOCSegmentation',
 

In [9]:
type(dataset.targets), dataset.targets.shape

(torch.Tensor, torch.Size([60000]))

In [10]:
type(dataset.data), dataset.data.shape #배치 크기 60000개.샘플 60000장... 크기 28*28

(torch.Tensor, torch.Size([60000, 28, 28]))

In [11]:
from torch.utils.data import DataLoader
#60000장 배치를 한번에 할 수 없으니까 배치 사이즈를 32씩 쪼개서 할 수 있도록

In [12]:
data_loader=DataLoader(dataset, batch_size = 32) #데이터셋을 주면 loader가 알아서 32개씩 쪼개줌 

In [13]:
for X_train, y_label in data_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([32, 1, 28, 28]) torch.Size([32])


In [14]:
loss_fn = nn.CrossEntropyLoss()

In [15]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [16]:
epochs = 1

for _ in range(epochs):
    for X_train, y_label in data_loader:
        optimizer.zero_grad() #경사 초기화
        outputs = model(X_train) #모델 예측
        loss = loss_fn(outputs, y_label) #손실 계산
        loss.backward()  #경사 계산
        optimizer.step() #모델 파라미터 업데이트 

RuntimeError: Given groups=1, weight of size [16, 3, 5, 5], expected input[32, 1, 28, 28] to have 3 channels, but got 1 channels instead