In [3]:
pip install wandb

Collecting wandb
  Obtaining dependency information for wandb from https://files.pythonhosted.org/packages/1c/5e/0362fa88679852c7fd3ac85ee5bd949426c4a51a61379010d4089be6d7ac/wandb-0.15.12-py3-none-any.whl.metadata
  Using cached wandb-0.15.12-py3-none-any.whl.metadata (9.8 kB)
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Obtaining dependency information for GitPython!=3.1.29,>=1.0.0 from https://files.pythonhosted.org/packages/8a/7e/20f7e45878b5aed34320fbeeae8f78acc806e7bd708d00b1c6e64b016f5b/GitPython-3.1.37-py3-none-any.whl.metadata
  Using cached GitPython-3.1.37-py3-none-any.whl.metadata (12 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Obtaining dependency information for sentry-sdk>=1.0.0 from https://files.pythonhosted.org/packages/63/25/d22e1e152e4eac10d39d9132d7b5f1ea4bdfa0b9a1d65fc606a7b90aeefb/sentry_sdk-1.32.0-py2.py3-none-any.whl.metadata
  Using cached sentry_sdk-1.32.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Using c

In [1]:
import torch
from torch import nn, optim
from torch.utils.data import random_split, DataLoader
from datetime import datetime
import wandb #  기계 학습 프로젝트 및 실험을 관리하고 모니터링하기 위한 온라인 플랫폼 및 라이브러리
import argparse #  명령줄 인터페이스(CLI)를 만들고 명령줄에서 스크립트를 실행할 때 사용자로부터 인수를 처리하는데 도움을 주는 라이브러리

from pathlib import Path
# C:/Users/joowo/git/link_dl
#BASE_PATH = str(Path(__file__).resolve().parent.parent.parent) # BASE_PATH: /Users/yhhan/git/link_dl
BASE_PATH = 'C:/Users/joowo/git/link_dl'

import sys
sys.path.append(BASE_PATH)

from _01_code._03_real_world_data_to_tensors.k_california_housing_dataset_dataloader \
    import CaliforniaHousingDataset


def get_data():
    california_housing_dataset = CaliforniaHousingDataset()
    print(california_housing_dataset)

    train_dataset, validation_dataset = random_split(california_housing_dataset, [0.8, 0.2])
    print(len(train_dataset), len(validation_dataset))

    train_data_loader = DataLoader(dataset=train_dataset, batch_size=wandb.config.batch_size, shuffle=True)
    validation_data_loader = DataLoader(dataset=validation_dataset, batch_size=len(validation_dataset))

    return train_data_loader, validation_data_loader

In [2]:
class MyModel(nn.Module):
    def __init__(self, n_input, n_output):
        super().__init__()

        self.model = nn.Sequential(
          nn.Linear(n_input, wandb.config.n_hidden_unit_list[0]),
          nn.ReLU(),
          nn.Linear(wandb.config.n_hidden_unit_list[0], wandb.config.n_hidden_unit_list[1]),
          nn.ReLU(),
          nn.Linear(wandb.config.n_hidden_unit_list[1], n_output),
        )

    def forward(self, x):
        x = self.model(x)
        return x

In [3]:
def get_model_and_optimizer():
    my_model = MyModel(n_input=8, n_output=1)
    optimizer = optim.SGD(my_model.parameters(), lr=wandb.config.learning_rate)

    return my_model, optimizer

In [4]:
def training_loop(model, optimizer, train_data_loader, validation_data_loader):
    n_epochs = wandb.config.epochs
    loss_fn = nn.MSELoss()  # Use a built-in loss function
    next_print_epoch = 100

    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        num_trains = 0
        for train_batch in train_data_loader:
            output_train = model(train_batch['input'])
            loss = loss_fn(output_train, train_batch['target'])
            loss_train += loss.item()
            num_trains += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        loss_validation = 0.0
        num_validations = 0
        
    with torch.no_grad():
        for validation_batch in validation_data_loader:
            output_validation = model(validation_batch['input'])
            loss = loss_fn(output_validation, validation_batch['target'])
            loss_validation += loss.item()
            num_validations += 1

    # 측정된 데이터는 wandb로 전송됨
    wandb.log({
      "Epoch": epoch,
      "Training loss": loss_train / num_trains,
      "Validation loss": loss_validation / num_validations
    })

    if epoch >= next_print_epoch:
        print(
            f"Epoch {epoch}, "
            f"Training loss {loss_train / num_trains:.4f}, "
            f"Validation loss {loss_validation / num_validations:.4f}"
        )
        next_print_epoch += 100

In [5]:
def main(args):
    # 현재 시간을 문자열로 포맷팅
    current_time_str = datetime.now().astimezone().strftime('%Y-%m-%d_%H-%M-%S')

    config = {
        'epochs': args.epochs,
        'batch_size': args.batch_size,
        'learning_rate': 1e-3,
        'n_hidden_unit_list': [20, 20],
    }

    wandb.init(
        mode="online" if args.wandb else "disabled",
        project="my_model_training",
        notes="My first wandb experiment",
        tags=["my_model", "california_housing"],
        name=current_time_str,
        config=config
    )
    print(args)
    print(wandb.config)

    train_data_loader, validation_data_loader = get_data()

    linear_model, optimizer = get_model_and_optimizer()

    wandb.watch(linear_model)

    print("#" * 50, 1)

    training_loop(
        model=linear_model,
        optimizer=optimizer,
        train_data_loader=train_data_loader,
        validation_data_loader=validation_data_loader
    )
    wandb.finish()

In [8]:
# https://docs.wandb.ai/guides/track/config
if __name__ == "__main__":
    # ArgumentParser 객체 생성
    parser = argparse.ArgumentParser()

    # 명령줄 옵션 및 인수 추가
    parser.add_argument(
        "--wandb", action=argparse.BooleanOptionalAction, default=False, help="True or False"
    )

    parser.add_argument(
        "-b", "--batch_size", type=int, default=512, help="Batch size (int, default: 512)"
    )

    parser.add_argument(
        "-e", "--epochs", type=int, default=1_000, help="Number of training epochs (int, default:1_000)"
    )

    # args = parser.parse_args() 오류나서 아래로 수정
    # 명령줄에서 전달된 옵션과 인수를 파싱하고 args 객체에 저장
    args = parser.parse_args(args=[])
    #  args=[]로 빈 리스트를 전달하여 명령줄에서 아무런 옵션도 받지 않는 것처럼 동작 
    # 즉, 기본값인 --wandb가 False로 설정되고, --batch_size와 --epochs도 기본값을 가짐
    

    main(args)

Namespace(wandb=False, batch_size=512, epochs=1000)
{'epochs': 1000, 'batch_size': 512, 'learning_rate': 0.001, 'n_hidden_unit_list': [20, 20]}
Data Size: 20640, Input Shape: torch.Size([20640, 8]), Target Shape: torch.Size([20640, 1])
16512 4128
################################################## 1
Epoch 100, Training loss 0.6105, Validation loss 0.6310
Epoch 200, Training loss 0.5118, Validation loss 0.5293
Epoch 300, Training loss 0.4724, Validation loss 0.4837
Epoch 400, Training loss 0.4495, Validation loss 0.4617
Epoch 500, Training loss 0.4361, Validation loss 0.4465
Epoch 600, Training loss 0.4246, Validation loss 0.4333
Epoch 700, Training loss 0.4127, Validation loss 0.4211
Epoch 800, Training loss 0.4049, Validation loss 0.4102
Epoch 900, Training loss 0.3949, Validation loss 0.4007
Epoch 1000, Training loss 0.3832, Validation loss 0.3932


#### Parsing(파싱)
- 텍스트나 데이터를 분석하고 원하는 정보를 추출하는 과정

#### epoch(에포크)
- 전체 학습 데이터 세트를 한 번 모델에 대해 순전파 및 역전파를 실행하는 것을 나타냄.
- <b>epoch</b>는 모델의 가중치 및 매개변수를 업데이트하고 학습 데이터에 대한 오차(손실)를 줄이는 데 사용됨