# [BAT512] Advanced Data Mining with AI <br/><br/> 개인과제 2

*구글 드라이브 연결과 라이브러리 임포트는 실습자료 참고하여 본인 경로에 맞게 설정하여 사용

In [4]:
# 데이터 로드 함수
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [158]:
X_train_container, y_train_container = [], []
for i in range(1,6):
    curr_data = unpickle(f"data/cifar-10-batches-py/data_batch_{i}")
    X_train_container.append(curr_data[b'data'])
    y_train_container.append(curr_data[b'labels'])

X_train = torch.tensor(np.array(X_train_container), dtype=torch.float)
y_train = torch.tensor(np.array(y_train_container))

X_train = X_train.view(50000, 3072)
y_train = y_train.view(50000)

test_data = unpickle(f"data/cifar-10-batches-py/test_batch")
X_test = torch.tensor(np.array(test_data[b'data']), dtype=torch.float)
y_test = torch.tensor(np.array(test_data[b'labels']))

X_test = X_test.view(10000, 3072)
y_test = y_test.view(10000)

In [159]:
# sol
X_train = X_train.view(-1, 3, 32, 32)
# y_train = nn.functional.one_hot(y_train.view(50000))

X_test = X_test.view(-1, 3, 32, 32)
# y_test = nn.functional.one_hot(y_test)

In [50]:
import numpy as np

In [76]:
from torch.utils.data import TensorDataset, DataLoader

In [160]:
batch_size = 32

train_set = TensorDataset(X_train, y_train)
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, drop_last=True)
test_set = TensorDataset(X_test, y_test)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False, drop_last=False)

In [80]:
import torch.nn as nn

In [81]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        # 첫 번째 계층(Conv1+Pool1)
        # 입력 데이터: (?, 32, 32, 1)
        #    Conv1 -> (?, 32, 32, 32)
        #    MaxPool1 -> (?, 16, 16, 32)
        self.layer_1 = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2))

        # 두 번째 계층(Conv2+Pool2)
        # 입력 데이터: (?, 16, 16, 32)
        #    Conv2 -> (?, 16, 16, 64)
        #    MaxPool2 -> (?, 8, 8, 64)
        self.layer_2 = nn.Sequential(
                nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2))

        n_flatten = 8 * 8 * 64 # 일렬로 펴져서 완전연결층에 전달되는 벡터의 크기

        # 세 번째 계층(fully-connected)
        # 입력 데이터: (?, 8*8*64)
        #     fc1 -> (?, 16)
        #     fc2 -> (?, 10)
        self.FC_layer = nn.Sequential(
                nn.Linear(n_flatten, 16),
                nn.Linear(16, 10))

    def forward(self, x):
        hidden = self.layer_1(x)
        hidden = self.layer_2(hidden)
        flattened = self.Flatten(hidden) # 일렬로 펴는 작업
        out = self.FC_layer(flattened)
        return out

    def Flatten(self, inputs):
        shapes = inputs.size(1) * inputs.size(2) * inputs.size(3)
        outputs = inputs.view(inputs.size(0), shapes)

        return outputs

In [163]:
model = CNN()

In [165]:
def run_epoch(model, data_loader, criterion, optimizer, train=False):
    loss_ep = 0
    if train:
        model.train()
    else:
        model.eval()

    for x, y in data_loader:
        if train:
            y_predicted = model(x)
            loss = criterion(y_predicted, y)
            loss_ep += loss.item()
            loss.backward()
            optimizer.step()
        else:
            with torch.autograd.no_grad():
                y_predicted = model(x)
                loss = criterion(y_predicted, y)
                loss_ep += loss.item()

        return loss_ep

In [166]:
learning_rate = 1e-5
max_epochs = 200

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [170]:
for epoch in range(1, max_epochs+1):
    train_loss = run_epoch(model, train_loader, criterion, optimizer, train=True)

    if epoch % 10 == 0:
        print("epoch:{}, train_loss: {:.4f}".format(epoch, train_loss))

epoch:10, train_loss: 14.8553
epoch:20, train_loss: 11.7579
epoch:30, train_loss: 13.7543
epoch:40, train_loss: 13.5583
epoch:50, train_loss: 23.8720
epoch:60, train_loss: 27.2049
epoch:70, train_loss: 18.5986
epoch:80, train_loss: 25.2318
epoch:90, train_loss: 26.0924
epoch:100, train_loss: 22.7545
epoch:110, train_loss: 15.0922
epoch:120, train_loss: 16.3726
epoch:130, train_loss: 16.4938
epoch:140, train_loss: 20.1297
epoch:150, train_loss: 15.2045
epoch:160, train_loss: 13.0863
epoch:170, train_loss: 12.5833
epoch:180, train_loss: 9.6743
epoch:190, train_loss: 10.8406
epoch:200, train_loss: 12.6670
