### 홀드아웃(Hold-out)
주어진 데이터를 랜덤하게 두개의 데이터로 구분하여 훈련/평가로 분류한 디 사용하는 방식이다. 
데이터가 충분한 경우, 훈련 데이터는 다시 훈련/검증용 데이터로 분류할 수 있다.

### 교차검증(K-fold Cross-Validation)
데이터를 무작위, 혹은 규칙을 설정하여 K개의 집단으로 구분한 뒤, 한 개의 데이터 집단을 검증용 데이터셋으로 활용한다. 이를 K번 반복하여 얻은 MSE값을 최종 성과지표로 활용된다.

데이터 셋을 K번 반복하여 학습하기 떄문에 시간이 오래 걸리는 단점이 있지만, 다양한 학습 데이터를 사용하는 관점에서 성능이 좋다는 장점이 있다.


In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# seed
seed = 0
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)
if device == 'cuda':
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

# hyperparameter
batch_size = 256
epochs = 20
learning_rate = 4e-3

# data and parameter

train = ds.MNIST(
    root='data/mnist',
    train=True,
    transform = transforms.ToTensor(),
    download = True,
)

test = ds.MNIST(
    root='data/mnist',
    train=False,
    transform = transforms.ToTensor(),
    download = True,
)

train, valid = train_test_split(train, test_size=0.2)

class MNISTDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X, y = self.data[idx]

        return X, y
    


train_dataset = MNISTDataset(train)
valid_dataset = MNISTDataset(valid)
test_dataset = MNISTDataset(test)

train_dataloader = DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    shuffle = True,
    drop_last = True,
)

valid_dataloader = DataLoader(
    dataset = valid_dataset,
    batch_size = batch_size,
    shuffle = False,
    drop_last = True,
)

test_dataloader = DataLoader(
    dataset = test_dataset,
    batch_size = batch_size,
    shuffle = False,
    drop_last = True,
)

class Model(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim

        self.linear = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.output = nn.Linear(hidden_dim, output_dim)
        

    def forward(self, x):
        x = self.linear(x)
        x = self.relu(x)
        x = self.output(x)

        return x
    

model = Model(28*28, 256, 10).to(device)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)



train_losses = []
train_accs = []
valid_losses = []
valid_accs = []

for epoch in tqdm(range(1,epochs+1)):
    total_train_acc =0
    total_train_loss =0

    model.train()
    for X, y in train_dataloader:
        X = X.to(device)
        X = X.flatten(start_dim =1)
        y = y.to(device)

        optimizer.zero_grad()
        output = model(X)
        logit = F.softmax(output, dim =-1)
        train_loss = criterion(logit, y)
        train_loss.backward()
        optimizer.step()

        train_acc = (logit.argmax(dim=-1) == y).float().mean()
        total_train_acc += train_acc
        total_train_loss += train_loss

    mean_train_acc = total_train_acc / len(train_dataloader)
    mean_train_loss = total_train_loss / len(train_dataloader)
    train_losses.append(mean_train_loss.item())
    train_accs.append(mean_train_acc)

    total_valid_loss = 0
    total_valid_acc = 0

    model.eval()
    with torch.no_grad():
        for X, y in valid_dataloader:
            X = X.to(device)
            X = X.flatten(start_dim = 1)
            y = y.to(device)

            
            output = model(X)
            logit = F.softmax(output, dim =-1)
            valid_loss = criterion(logit, y)
            

            valid_acc = (logit.argmax(dim=-1) == y).float().mean()
            total_valid_acc += valid_acc
            total_valid_loss += valid_loss

    mean_valid_acc = total_valid_acc / len(valid_dataloader)
    mean_valid_loss = total_valid_loss / len(valid_dataloader)
    valid_losses.append(mean_valid_loss.item())
    valid_accs.append(mean_valid_acc)
    
    print(f'Epoch: {epoch} | train_loss: {mean_train_loss: .4f} | train_acc: {mean_train_acc*100: .2f}% | valid_loss: {mean_valid_loss: .4f} | valid_acc: {mean_valid_acc*100: .2f}%'  )



