In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import albumentations as A
from torch.utils.data import Dataset
from albumentations.pytorch import ToTensorV2

In [None]:
transform = transforms.Compose(
  [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))]
)

batch_size = 32
lr = 0.0001
num_epoch = 10

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                      download=True, transform=transform)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                      download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                        shuffle=True, num_workers=2)

test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5, stride=1, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(7 * 7 * 50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))  # 28x28x1 -> 28x28x20
        x = self.pool(x)           # 28x28x20 -> 14x14x20
        x = F.relu(self.conv2(x))  # 14x14x20 -> 14x14x50
        x = self.pool(x)           # 14x14x50 -> 7x7x50
        x = x.view(-1, 7 * 7 * 50)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x



In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=50, kernel_size=5, stride=1, padding=2)
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))
        self.dropout = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(7 * 7 * 50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))  # 28x28x1 -> 28x28x20
        x = self.pool(x)           # 28x28x20 -> 14x14x20
        x = F.relu(self.conv2(x))  # 14x14x20 -> 14x14x50
        x = self.adaptive_pool(x)  # 14x14x50 -> 7x7x50
        x = x.view(-1, 7 * 7 * 50)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

    def weight_initialization(self, weight_init_method='xavier'):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if weight_init_method == 'gaussian':
                    nn.init.normal_(m.weight)
                elif weight_init_method == 'xavier':
                    nn.init.xavier_normal_(m.weight)
                elif weight_init_method == 'kaiming':
                    nn.init.kaiming_normal_(m.weight)
                elif weight_init_method == 'zeros':
                    nn.init.zeros_(m.weight)
                nn.init.zeros_(m.bias)

            elif isinstance(m, nn.Linear):
                if weight_init_method == 'gaussian':
                    nn.init.normal_(m.weight)
                elif weight_init_method == 'xavier':
                    nn.init.xavier_normal_(m.weight)
                elif weight_init_method == 'kaiming':
                    nn.init.kaiming_normal_(m.weight)
                elif weight_init_method == 'zeros':
                    nn.init.zeros_(m.weight)
                nn.init.zeros_(m.bias)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
model.weight_initialization('gaussian')
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
model.train()
for epoch in range(num_epoch):
  for index, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      output = model(data)
      loss = criterion(output, target)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if index % 100 == 0:
          print(f"loss of {epoch} epoch, {index} index : {loss.item()}")

In [None]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
  for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    test_loss += criterion(output, target).item()
    pred = output.argmax(dim=1, keepdim=True)
    correct += pred.eq(target.view_as(pred)).sum().item()

print('\n 평균 loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


## AlbumentationsMNIST

https://albumentations-demo.herokuapp.com/

In [None]:
class AlbumentationsMNIST(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]

        image_np = np.array(image)

        if self.transform:
            transformed = self.transform(image=image_np)
            image_transformed = transformed['image']
        else:
            image_transformed = image_np

        return image_transformed, label

In [None]:
aug_transform = A.Compose([
    A.Rotate(limit=20, p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    ToTensorV2()
])

trainset_augmented = AlbumentationsMNIST(trainset, transform=aug_transform)
testset_augmented = AlbumentationsMNIST(testset, transform=aug_transform)

In [None]:
train_loader = torch.utils.data.DataLoader(trainset_augmented, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset_augmented, batch_size=batch_size, shuffle=False)

# RNN

In [None]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.gru = nn.GRU(input_size, self.hidden_size, self.num_layers, batch_first=True)
        self.fc = nn.Linear(self.hidden_size, num_classes)

    def forward(self, x, rnn):
        if rnn == 'lstm':
            rnn_layer = self.lstm
            h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            out, _ = rnn_layer(x, (h0, c0))
        else:
            rnn_layer = self.gru
            h = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
            out, _ = rnn_layer(x, h)

        out = self.fc(out[:, -1, :])
        return out

    def weight_initialization(self, weight_init_method='xavier'):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if weight_init_method == 'gaussian':
                    nn.init.normal_(m.weight)
                elif weight_init_method == 'xavier':
                    nn.init.xavier_normal_(m.weight)
                elif weight_init_method == 'kaiming':
                    nn.init.kaiming_normal_(m.weight)
                elif weight_init_method == 'zeros':
                    nn.init.zeros_(m.weight)

                nn.init.zeros_(m.bias)

            elif isinstance(m, nn.LSTM) or isinstance(m, nn.GRU):
                for name, param in m.named_parameters():
                    if 'weight_ih' in name:
                        if weight_init_method == 'gaussian':
                            nn.init.normal_(param)
                        elif weight_init_method == 'xavier':
                            nn.init.xavier_normal_(param)
                        elif weight_init_method == 'kaiming':
                            nn.init.kaiming_normal_(param)
                        elif weight_init_method == 'zeros':
                            nn.init.zeros_(param)
                    elif 'weight_hh' in name:
                        if weight_init_method == 'gaussian':
                            nn.init.normal_(param)
                        elif weight_init_method == 'xavier':
                            nn.init.xavier_normal_(param)
                        elif weight_init_method == 'kaiming':
                            nn.init.kaiming_normal_(param)
                        elif weight_init_method == 'zeros':
                            nn.init.zeros_(param)
                    elif 'bias' in name:
                        nn.init.zeros_(param)


In [None]:
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 10
batch_size = 100
num_epochs = 10
learning_rate = 0.01

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [None]:
for p in model.parameters():
    print(p.size())

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
model.weight_initialization('gaussian')
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
model.train()
for epoch in range(num_epoch):
  for index, (data, target) in enumerate(train_loader):
      data, target = data.reshape(-1, sequence_length, input_size).to(device), target.to(device)
      optimizer.zero_grad()
      output = model(data, 'lstm')
      loss = criterion(output, target)
      loss.backward()
      optimizer.step()

      if index % 100 == 0:
          print(f"loss of {epoch} epoch, {index} index : {loss.item()}")

In [None]:
for index, (data, target) in enumerate(train_loader):
      data, target = data.reshape(-1, sequence_length, input_size).to(device), target.to(device)
      print(data, target)
      print(data.shape)
      break

for index, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      print(data, target)
      print(data.shape)
      break

In [None]:
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
  for data, target in test_loader:
    data, target = data.reshape(-1, sequence_length, input_size).to(device), target.to(device)
    output = model(data, 'lstm')
    test_loss += criterion(output, target).item()
    pred = output.argmax(dim=1, keepdim=True)
    correct += pred.eq(target.view_as(pred)).sum().item()

print('\n 평균 loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
