# [요구사항 1] titanic 딥러닝 모델 기본 훈련

In [29]:
# --- JUPYTER NOTEBOOK용 설정 ---
import os, sys
from types import SimpleNamespace
from pathlib import Path

# repo 루트 자동 탐색: _01_code 폴더가 보이는 위치를 찾음
repo_root = None
cur = Path.cwd()
for p in [cur, *cur.parents]:
    if (p / "_01_code").exists():
        repo_root = p
        break
if repo_root is None:
    # repo_root = Path(r"C:\Users\hanle\git\link_dl")
    raise RuntimeError("repo_root 탐색 실패함")

BASE_PATH = str(repo_root.resolve())
print("BASE_PATH:", BASE_PATH)
if BASE_PATH not in sys.path:
    sys.path.append(BASE_PATH)

# -------------------------------------------------------

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from datetime import datetime
import wandb

# Titanic 데이터셋 
from _03_homeworks.homework_2.titanic_dataset import get_preprocessed_dataset


# Jupyter Notebook 환경에서 argparse를 대신
args = SimpleNamespace(
    wandb=True,           # WandB 켜기/끄기 (True/False)
    batch_size=128,       # Batch size 바꿔가면서 실행 (16,32,64,128)
    epochs=300,           # Epochs
    learning_rate=1e-3,   # LR
    n_hidden_unit_list=[64, 32],  # 은닉층
    activation="leakyrelu"     # Activation Function 바꿔가면서 실행 (relu, elu , leakyrelu, sigmoid)
)

# 활성화 함수 미리 정의
def get_activation(name: str) -> nn.Module:
    name = name.lower()
    if name == "relu":
        return nn.ReLU()
    if name == "elu":
        return nn.ELU(alpha=1.0)
    if name in "leakyrelu":
        return nn.LeakyReLU(negative_slope=0.01)
    if name == "sigmoid":
        return nn.Sigmoid()
    raise ValueError(f"Unsupported activation: {name}")

def get_data():

    #california_housing_dataset = CaliforniaHousingDataset()
    #print(california_housing_dataset)

    #train_dataset, validation_dataset = random_split(california_housing_dataset, [0.8, 0.2])
    #print(len(train_dataset), len(validation_dataset))
    """
    titanic_dataset.py에서 dataset을 받아 DataLoader 구성
    """
    train_dataset, validation_dataset, _ = get_preprocessed_dataset()

    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=args.batch_size,
        shuffle=True
    )

    validation_data_loader = DataLoader(
        dataset=validation_dataset,
        batch_size=len(validation_dataset)  
    )

    return train_data_loader, validation_data_loader

class MyModel(nn.Module):
    def __init__(self, n_input, n_output):
        super().__init__()
        hidden1, hidden2 = args.n_hidden_unit_list
        act = get_activation(args.activation)
        # 동일 종류 activation 새 인스턴스를 두 번 쓰기
        def new_act_like(a: nn.Module) -> nn.Module:
            if isinstance(a, nn.ELU):
                return nn.ELU(alpha=a.alpha)
            if isinstance(a, nn.LeakyReLU):
                return nn.LeakyReLU(negative_slope=a.negative_slope)
            return a.__class__()

        self.model = nn.Sequential(
            nn.Linear(n_input, hidden1),
            act,
            nn.Linear(hidden1, hidden2),
            new_act_like(act),
            nn.Linear(hidden2, n_output),  # logits (2 클래스)
        )

    def forward(self, x):
        return self.model(x)

def get_model_and_optimizer(input_dim):
    """
    Titanic: input_dim=10, output_dim=2 (이진분류)
    """
    my_model = MyModel(n_input=input_dim, n_output=2)
    optimizer = optim.SGD(my_model.parameters(), lr=args.learning_rate)
    return my_model, optimizer

def training_loop(model, optimizer, train_data_loader, validation_data_loader, device):
    n_epochs = args.epochs
    loss_fn = nn.CrossEntropyLoss()  
    next_print_epoch = 50

    model.to(device)

    for epoch in range(1, n_epochs + 1):
        model.train()
        loss_train = 0.0
        num_trains = 0

        for batch in train_data_loader:
            # titanic_dataset은 dict 배치 {'input': Tensor, 'target': Tensor}
            input_t = batch['input'].to(device)
            target_t = batch['target'].to(device)  # LongTensor(0/1)

            logits = model(input_t)
            loss = loss_fn(logits, target_t)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()
            num_trains += 1

        # ----- Validation -----
        model.eval()
        loss_validation = 0.0
        num_validations = 0
        with torch.no_grad():
            for batch in validation_data_loader:
                input_v = batch['input'].to(device)
                target_v = batch['target'].to(device)

                logits_v = model(input_v)
                loss_v = loss_fn(logits_v, target_v)

                loss_validation += loss_v.item()
                num_validations += 1

        # wandb 로깅
        if args.wandb:
            wandb.log({
                "Epoch": epoch,
                "Training loss": loss_train / max(1, num_trains),
                "Validation loss": loss_validation / max(1, num_validations)
            })

        if epoch % next_print_epoch == 0 or epoch == 1:
            print(
                f"[Epoch {epoch:>4}] "
                f"T_loss: {loss_train / max(1, num_trains):.4f} | "
                f"V_loss: {loss_validation / max(1, num_validations):.4f}"
            )

# 실행: 노트북에서 함수처럼 호출
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

if args.wandb:
    wandb.init(
        mode="online",
        project="titanic_training",
        notes="Titanic binary classification with FCN (Notebook)",
        tags=["titanic", "binary_classification", "fcn", args.activation, f"bs{args.batch_size}"],
        name=datetime.now().astimezone().strftime('%Y-%m-%d_%H-%M-%S'),
        config=dict(
            epochs=args.epochs,
            batch_size=args.batch_size,
            learning_rate=args.learning_rate,
            n_hidden_unit_list=args.n_hidden_unit_list,
            activation=args.activation
        )
    )

train_data_loader, validation_data_loader = get_data()

# 입력 차원 자동 추론 (첫 배치 한 번 꺼내보기)
sample_batch = next(iter(train_data_loader))
input_dim = sample_batch['input'].shape[1]
print("Input dim:", input_dim)

model, optimizer = get_model_and_optimizer(input_dim)

print("#" * 50, 1)

training_loop(
    model=model,
    optimizer=optimizer,
    train_data_loader=train_data_loader,
    validation_data_loader=validation_data_loader,
    device=device
)

if args.wandb:
    print("WandB run URL:", wandb.run.url)
    wandb.finish()


BASE_PATH: C:\Users\hanle\git\link_dl
Device: cpu


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["alone"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["Embarked"].fillna("missing", inplace=True)


Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked', 'title', 'family_num', 'alone'],
      dtype='object')
   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked  title  \
0       0.0       3    1  22.0      1      0   7.2500         2      2   
1       1.0       1    0  38.0      1      0  71.2833         0      3   
2       1.0       3    0  26.0      0      0   7.9250         2      1   
3       1.0       1    0  35.0      1      0  53.1000         2      3   
4       0.0       3    1  35.0      0      0   8.0500         2      2   
5       0.0       3    1  29.0      0      0   8.4583         1      2   
6       0.0       1    1  54.0      0      0  51.8625         2      2   
7       0.0       3    1   2.0      3      1  21.0750         2      0   
8       1.0       3    0  27.0      0      2  11.1333         2      3   
9       1.0       2    0  14.0      1      0  30.0708         0      3   

   family_num  alone  
0           1    0.

0,1
Epoch,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇█
Training loss,██▇▇▆▇▇▆▇▅▄▅▅▆▅▃▃▄▃▅▄▄▄▃▄▂▁▂▂▂▃▁▃▂▂▂▃▂▁▁
Validation loss,▇▇▇███▇▇▇▇▆▇▆▇▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▄▂▂▃▂▁▁

0,1
Epoch,300.0
Training loss,0.55258
Validation loss,0.61259






# 
# 
# [요구사항 2] Activation	Function 과 Batch Size 변경 및 선택하기

# ----- 학습 결과 [Activaton Function - Batch Size] -----
### Epoch: 300으로 통일

[Sigmoid-16]
[Epoch    1] T_loss: 0.7580 | V_loss: 0.7615
[Epoch   50] T_loss: 0.6609 | V_loss: 0.6392
[Epoch  100] T_loss: 0.6463 | V_loss: 0.6271
[Epoch  150] T_loss: 0.6322 | V_loss: 0.6163
[Epoch  200] T_loss: 0.6161 | V_loss: 0.6072
[Epoch  250] T_loss: 0.6095 | V_loss: 0.6028
[Epoch  300] T_loss: 0.6033 | V_loss: 0.6009

Run summary:
Epoch	300
Training loss	0.60332
Validation loss	0.60094




[Sigmoid-32]
[Epoch    1] T_loss: 0.7196 | V_loss: 0.7194
[Epoch   50] T_loss: 0.6621 | V_loss: 0.6485
[Epoch  100] T_loss: 0.6548 | V_loss: 0.6429
[Epoch  150] T_loss: 0.6531 | V_loss: 0.6373
[Epoch  200] T_loss: 0.6460 | V_loss: 0.6315
[Epoch  250] T_loss: 0.6338 | V_loss: 0.6256
[Epoch  300] T_loss: 0.6311 | V_loss: 0.6200

Run summary:
Epoch	300
Training loss	0.63114
Validation loss	0.62003



[Sigmoid-64]
[Epoch    1] T_loss: 0.6734 | V_loss: 0.6489
[Epoch   50] T_loss: 0.6643 | V_loss: 0.6491
[Epoch  100] T_loss: 0.6658 | V_loss: 0.6474
[Epoch  150] T_loss: 0.6599 | V_loss: 0.6452
[Epoch  200] T_loss: 0.6624 | V_loss: 0.6427
[Epoch  250] T_loss: 0.6613 | V_loss: 0.6412
[Epoch  300] T_loss: 0.6547 | V_loss: 0.6381

Run summary:
Epoch	300
Training loss	0.65468
Validation loss	0.63808



[Sigmoid-128]
[Epoch    1] T_loss: 0.7839 | V_loss: 0.8069
[Epoch   50] T_loss: 0.6806 | V_loss: 0.6742
[Epoch  100] T_loss: 0.6740 | V_loss: 0.6578
[Epoch  150] T_loss: 0.6721 | V_loss: 0.6532
[Epoch  200] T_loss: 0.6690 | V_loss: 0.6505
[Epoch  250] T_loss: 0.6698 | V_loss: 0.6485
[Epoch  300] T_loss: 0.6680 | V_loss: 0.6465

Run summary:
Epoch	300
Training loss	0.66796
Validation loss	0.64655



[ReLu-16]
[Epoch    1] T_loss: 0.6445 | V_loss: 0.6536
[Epoch   50] T_loss: 0.5833 | V_loss: 0.5949
[Epoch  100] T_loss: 0.5718 | V_loss: 0.5982
[Epoch  150] T_loss: 0.5552 | V_loss: 0.6144
[Epoch  200] T_loss: 0.5562 | V_loss: 0.5813
[Epoch  250] T_loss: 0.5224 | V_loss: 0.6294
[Epoch  300] T_loss: 0.5034 | V_loss: 0.5736

Run summary:
Epoch	300
Training loss	0.50345
Validation loss	0.5736



[ReLu-32]
[Epoch    1] T_loss: 0.7099 | V_loss: 0.6487
[Epoch   50] T_loss: 0.5776 | V_loss: 0.5949
[Epoch  100] T_loss: 0.5745 | V_loss: 0.6041
[Epoch  150] T_loss: 0.5678 | V_loss: 0.5765
[Epoch  200] T_loss: 0.5626 | V_loss: 0.6033
[Epoch  250] T_loss: 0.5547 | V_loss: 0.5981
[Epoch  300] T_loss: 0.5368 | V_loss: 0.5751

Run summary:
Epoch	300
Training loss	0.53681
Validation loss	0.57507



[ReLu-64]
[Epoch    1] T_loss: 0.6898 | V_loss: 0.6486
[Epoch   50] T_loss: 0.5876 | V_loss: 0.6235
[Epoch  100] T_loss: 0.5795 | V_loss: 0.6249
[Epoch  150] T_loss: 0.5459 | V_loss: 0.6226
[Epoch  200] T_loss: 0.5426 | V_loss: 0.6350
[Epoch  250] T_loss: 0.5469 | V_loss: 0.6585
[Epoch  300] T_loss: 0.5487 | V_loss: 0.6117

Run summary:
Epoch	300
Training loss	0.54869
Validation loss	0.61165



[ReLu-128]
[Epoch    1] T_loss: 0.6663 | V_loss: 0.6368
[Epoch   50] T_loss: 0.6018 | V_loss: 0.5987
[Epoch  100] T_loss: 0.5934 | V_loss: 0.5978
[Epoch  150] T_loss: 0.5863 | V_loss: 0.5896
[Epoch  200] T_loss: 0.5846 | V_loss: 0.5903
[Epoch  250] T_loss: 0.5778 | V_loss: 0.5825
[Epoch  300] T_loss: 0.5860 | V_loss: 0.5822

Run summary:
Epoch	300
Training loss	0.58595
Validation loss	0.58221



[ELU-16]
[Epoch    1] T_loss: 0.7640 | V_loss: 0.6499
[Epoch   50] T_loss: 0.5656 | V_loss: 0.6225
[Epoch  100] T_loss: 0.5388 | V_loss: 0.5857
[Epoch  150] T_loss: 0.5156 | V_loss: 0.5381
[Epoch  200] T_loss: 0.4975 | V_loss: 0.5414
[Epoch  250] T_loss: 0.4666 | V_loss: 0.4924
[Epoch  300] T_loss: 0.4728 | V_loss: 0.5361

Run summary:
Epoch	300
Training loss	0.47278
Validation loss	0.53614


[ELU-32]
[Epoch    1] T_loss: 0.6666 | V_loss: 0.6611
[Epoch   50] T_loss: 0.5651 | V_loss: 0.6164
[Epoch  100] T_loss: 0.5557 | V_loss: 0.6267
[Epoch  150] T_loss: 0.5421 | V_loss: 0.6424
[Epoch  200] T_loss: 0.5424 | V_loss: 0.6004
[Epoch  250] T_loss: 0.5182 | V_loss: 0.6156
[Epoch  300] T_loss: 0.5171 | V_loss: 0.5810

Run summary:
Epoch	300
Training loss	0.51711
Validation loss	0.58097


[ELU-64]
[Epoch    1] T_loss: 0.7334 | V_loss: 0.6572
[Epoch   50] T_loss: 0.5987 | V_loss: 0.5751
[Epoch  100] T_loss: 0.5734 | V_loss: 0.5602
[Epoch  150] T_loss: 0.5843 | V_loss: 0.5510
[Epoch  200] T_loss: 0.5513 | V_loss: 0.5422
[Epoch  250] T_loss: 0.5459 | V_loss: 0.5337
[Epoch  300] T_loss: 0.5627 | V_loss: 0.5317

Run summary:
Epoch	300
Training loss	0.56273
Validation loss	0.53165


[ELU-128]
[Epoch    1] T_loss: 0.7228 | V_loss: 0.6568
[Epoch   50] T_loss: 0.6003 | V_loss: 0.6150
[Epoch  100] T_loss: 0.5967 | V_loss: 0.6157
[Epoch  150] T_loss: 0.5919 | V_loss: 0.6130
[Epoch  200] T_loss: 0.5847 | V_loss: 0.6103
[Epoch  250] T_loss: 0.5882 | V_loss: 0.6058
[Epoch  300] T_loss: 0.5795 | V_loss: 0.6011

Run summary:
Epoch	300
Training loss	0.57952
Validation loss	0.60114



[LeakyReLU-16]
[Epoch    1] T_loss: 0.6221 | V_loss: 0.5429
[Epoch   50] T_loss: 0.5857 | V_loss: 0.5268
[Epoch  100] T_loss: 0.5728 | V_loss: 0.5194
[Epoch  150] T_loss: 0.5555 | V_loss: 0.4965
[Epoch  200] T_loss: 0.5354 | V_loss: 0.4887
[Epoch  250] T_loss: 0.5259 | V_loss: 0.4804
[Epoch  300] T_loss: 0.5011 | V_loss: 0.4949

Run summary:
Epoch	300
Training loss	0.50107
Validation loss	0.49494



[LeakyReLU-32]
[Epoch    1] T_loss: 0.7664 | V_loss: 0.6549
[Epoch   50] T_loss: 0.5957 | V_loss: 0.5654
[Epoch  100] T_loss: 0.5793 | V_loss: 0.5505
[Epoch  150] T_loss: 0.5789 | V_loss: 0.5397
[Epoch  200] T_loss: 0.5642 | V_loss: 0.5274
[Epoch  250] T_loss: 0.5557 | V_loss: 0.5145
[Epoch  300] T_loss: 0.5448 | V_loss: 0.5078

Run summary:
Epoch	300
Training loss	0.54477
Validation loss	0.50777


[LeakyReLU-64]
[Epoch    1] T_loss: 0.7183 | V_loss: 0.6379
[Epoch   50] T_loss: 0.5979 | V_loss: 0.5763
[Epoch  100] T_loss: 0.5943 | V_loss: 0.5859
[Epoch  150] T_loss: 0.5818 | V_loss: 0.5736
[Epoch  200] T_loss: 0.5684 | V_loss: 0.5744
[Epoch  250] T_loss: 0.5576 | V_loss: 0.5667
[Epoch  300] T_loss: 0.5524 | V_loss: 0.5630

Run summary:
Epoch	300
Training loss	0.55242
Validation loss	0.56301


[LeakyReLU-128]
[Epoch    1] T_loss: 0.9574 | V_loss: 0.7444
[Epoch   50] T_loss: 0.5689 | V_loss: 0.6235
[Epoch  100] T_loss: 0.5706 | V_loss: 0.6212
[Epoch  150] T_loss: 0.5678 | V_loss: 0.6181
[Epoch  200] T_loss: 0.5608 | V_loss: 0.6161
[Epoch  250] T_loss: 0.5585 | V_loss: 0.6143
[Epoch  300] T_loss: 0.5526 | V_loss: 0.6126

Run summary:
Epoch	300
Training loss	0.55258
Validation loss	0.61259


## 결론
활성화 함수별 성능 순위:
ELU >= LeakyReLU > ReLU > Sigmoid

Batch Size별 경향:
- 배치 사이즈 16, 32는 빠르게 학습하고 Validation loss가 낮음
- 배치 사이즈 64, 128는 안정적이지만 수렴 속도가 느리고 loss가 약간 높음

최종적으로 가장 좋은 조합:
- ELU + Batch Size 64 (Validation loss = 0.5317)
-> 수렴 안정, 손실 가장 낮음, 과적합 없음

#
#
#
# [요구사항 3] 테스트 및 submission.csv 생성

In [35]:
# 결론으로 이어지는 간소화 버전 training_loop: best_state/best_epoch/best_val 반환
import copy
import torch
from torch import nn

def training_loop(model, optimizer, train_data_loader, validation_data_loader, device):
    n_epochs = args.epochs
    loss_fn = nn.CrossEntropyLoss()
    next_print_epoch = 50

    model.to(device)

    best_val = None
    best_epoch = None
    best_state = None

    for epoch in range(1, n_epochs + 1):
        # ---- Train ----
        model.train()
        loss_train, num_trains = 0.0, 0
        for batch in train_data_loader:
            x = batch["input"].to(device)
            y = batch["target"].to(device)

            logits = model(x)
            loss = loss_fn(logits, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()
            num_trains += 1

        # ---- Validation ----
        model.eval()
        loss_val, num_vals = 0.0, 0
        with torch.no_grad():
            for batch in validation_data_loader:
                x = batch["input"].to(device)
                y = batch["target"].to(device)
                logits = model(x)
                loss = loss_fn(logits, y)
                loss_val += loss.item()
                num_vals += 1

        tr = loss_train / max(1, num_trains)
        vl = loss_val / max(1, num_vals)

        # best 갱신 기록
        if (best_val is None) or (vl < best_val - 1e-12):
            best_val = vl
            best_epoch = epoch
            best_state = copy.deepcopy(model.state_dict())

        if args.wandb:
            wandb.log({
                "Epoch": epoch,
                "Training loss": tr,
                "Validation loss": vl,
                "Best val loss": best_val,
                "Best val epoch": best_epoch
            })

        if epoch % next_print_epoch == 0 or epoch == 1:
            print(f"[Epoch {epoch:>4}] T_loss: {tr:.4f} | V_loss: {vl:.4f} | best@{best_epoch}={best_val:.4f}")

    return best_state, best_epoch, best_val


In [36]:
import pandas as pd
import copy

# 실험 조합 고정 (요구사항 2 결과 반영)
args.activation = "elu"
args.batch_size = 64

# 데이터/모델 준비는 기존대로
train_data_loader, validation_data_loader = get_data()
sample_batch = next(iter(train_data_loader))
input_dim = sample_batch['input'].shape[1]
model, optimizer = get_model_and_optimizer(input_dim)

if args.wandb:
    wandb.init(
        mode="online",
        project="titanic_training",
        notes="Req3: Best-epoch checkpointing + submission",
        tags=["titanic", "ELU", "bs64", "submission"],
        name=datetime.now().astimezone().strftime('%Y-%m-%d_%H-%M-%S'),
        config=dict(
            epochs=args.epochs,
            batch_size=args.batch_size,
            learning_rate=args.learning_rate,
            n_hidden_unit_list=args.n_hidden_unit_list,
            activation=args.activation
        )
    )

# 학습 + 베스트 체크포인트 획득
best_state, best_epoch, best_val = training_loop(
    model=model,
    optimizer=optimizer,
    train_data_loader=train_data_loader,
    validation_data_loader=validation_data_loader,
    device=device
)
print(f"[Best] epoch={best_epoch}, val_loss={best_val:.4f}")

# 베스트 가중치 
if best_state is not None:
    model.load_state_dict(best_state)

# test_dataset 로드
_, _, test_dataset = get_preprocessed_dataset()
test_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset))

# 예측 (logits -> argmax -> 0/1)
model.eval()
with torch.no_grad():
    for batch in test_loader:
        logits = model(batch['input'].to(device))
        preds = torch.argmax(logits, dim=1).cpu().numpy()  # 0/1
        break

# PassengerId 가져와서 submission.csv 만들기
test_csv_path = Path(BASE_PATH) / "_03_homeworks" / "homework_2" / "test.csv"
passenger_ids = pd.read_csv(test_csv_path)["PassengerId"].values

submission = pd.DataFrame({
    "PassengerId": passenger_ids,
    "Survived": preds.astype(int)
})

save_path = Path(BASE_PATH) / "_03_homeworks" / "homework_2" / "submission.csv"
submission.to_csv(save_path, index=False)
print(f"Saved submission: {save_path}")
if args.wandb:
    print("WandB run URL:", wandb.run.url)
    wandb.finish()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["alone"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["Embarked"].fillna("missing", inplace=True)


Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked', 'title', 'family_num', 'alone'],
      dtype='object')
   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked  title  \
0       0.0       3    1  22.0      1      0   7.2500         2      2   
1       1.0       1    0  38.0      1      0  71.2833         0      3   
2       1.0       3    0  26.0      0      0   7.9250         2      1   
3       1.0       1    0  35.0      1      0  53.1000         2      3   
4       0.0       3    1  35.0      0      0   8.0500         2      2   
5       0.0       3    1  29.0      0      0   8.4583         1      2   
6       0.0       1    1  54.0      0      0  51.8625         2      2   
7       0.0       3    1   2.0      3      1  21.0750         2      0   
8       1.0       3    0  27.0      0      2  11.1333         2      3   
9       1.0       2    0  14.0      1      0  30.0708         0      3   

   family_num  alone  
0           1    0.

[Epoch    1] T_loss: 0.7429 | V_loss: 0.6679 | best@1=0.6679
[Epoch   50] T_loss: 0.5739 | V_loss: 0.5878 | best@49=0.5870
[Epoch  100] T_loss: 0.5622 | V_loss: 0.5865 | best@99=0.5777
[Epoch  150] T_loss: 0.5619 | V_loss: 0.5708 | best@150=0.5708
[Epoch  200] T_loss: 0.5507 | V_loss: 0.5667 | best@198=0.5642
[Epoch  250] T_loss: 0.5436 | V_loss: 0.5586 | best@250=0.5586
[Epoch  300] T_loss: 0.5221 | V_loss: 0.5543 | best@299=0.5522
[Best] epoch=299, val_loss=0.5522


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["alone"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  all_df["Embarked"].fillna("missing", inplace=True)


Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked', 'title', 'family_num', 'alone'],
      dtype='object')
   Survived  Pclass  Sex   Age  SibSp  Parch     Fare  Embarked  title  \
0       0.0       3    1  22.0      1      0   7.2500         2      2   
1       1.0       1    0  38.0      1      0  71.2833         0      3   
2       1.0       3    0  26.0      0      0   7.9250         2      1   
3       1.0       1    0  35.0      1      0  53.1000         2      3   
4       0.0       3    1  35.0      0      0   8.0500         2      2   
5       0.0       3    1  29.0      0      0   8.4583         1      2   
6       0.0       1    1  54.0      0      0  51.8625         2      2   
7       0.0       3    1   2.0      3      1  21.0750         2      0   
8       1.0       3    0  27.0      0      2  11.1333         2      3   
9       1.0       2    0  14.0      1      0  30.0708         0      3   

   family_num  alone  
0           1    0.

0,1
Best val epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇█
Best val loss,█▆▆▅▅▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁
Epoch,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████
Training loss,█▇███▇▆▆▅▆▆▃▄▅▅▅█▆▄▃▁▇▅▅▄▄▃▃▅▃▅▄▄▃▃▁▂▁▄▁
Validation loss,▆▆▆▅▅▅▅▆▅▄▄▄▄▅▄▃█▄▃▃▃▂▃▃▄▃▃▂▂▅▂▂▂▂▃▁▂▁▆▁

0,1
Best val epoch,299.0
Best val loss,0.55224
Epoch,300.0
Training loss,0.52214
Validation loss,0.55431


### 고찰
단순히 마지막 Epoch이 아니라,
Validation loss가 최소가 되는 Epoch의 모델이 점수가 잘 나올 가능성이 크다.
그래서 학습 중에 최저 Validation loss를 계속 추적하도록 코딩했고 갱신될 때마다 그때의 가중치(state_dict) 를 deepcopy로 저장하도록 training_loop를 수정하였다.
학습이 끝나면 (best_state, best_epoch, best_val)을 반환해서 베스트 Epoch 가중치로 예측을 진행하였다.


#
#
#
# [요구사항 4] submission.csv 제출 및 등수확인

![Kaggle Leaderboard](04871484-ce74-4e12-a93a-f62199efd576.png)