In [1]:
import torch
import pytorch_lightning as pl
from torch import Tensor, nn
from sklearn.datasets import load_boston
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from sklearn.model_selection import train_test_split
import numpy as np
import warnings

warnings.filterwarnings('ignore')

#Boston 집값 데이터를 읽어온다.
X, y = load_boston(return_X_y=True)
train_x, val_x, train_y, val_y = train_test_split(X, y, test_size=0.3)

class SklearnDataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray):
        super().__init__()
        scaler = MinMaxScaler() 

        scaler.fit(X) 
        self.X = scaler.transform(X)
        self.Y = y

    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        x = self.X[idx].astype(np.float32)
        y = self.Y[idx].astype(np.float32)
        return x, y

train_Dataset = SklearnDataset(train_x, train_y)
valid_Dataset = SklearnDataset(val_x, val_y)

In [3]:
model = nn.Linear(13, 1, bias=True)
model(torch.Tensor(train_Dataset[0][0]))

tensor([0.1068], grad_fn=<AddBackward0>)

In [8]:
train_loader = DataLoader(train_Dataset, batch_size=32, shuffle=True, drop_last=True)
valid_loader = DataLoader(valid_Dataset, batch_size=32, shuffle=True, drop_last=True)


class LinRegModel(pl.LightningModule):
    def __init__(self, input_dim: int):
        super().__init__()
        self.linear = nn.Linear(in_features=13, out_features=1, bias=True)
        self.batch_size = 32
    
    def forward(self, x):
        y_hat = self.linear(x)
        return y_hat

    def training_step(self, batch, batch_idx):
        x, y = batch
        # flatten any input
        x = x.view(x.size(0), -1)
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y, reduction="sum")
        return loss

    def validation_step(self, batch, batch_idx):
        
        x, y = batch
        x = x.view(x.size(0), -1)
        logits = self(x)
        mse = F.mse_loss(logits, y)
        metrics = {'val_mse': mse}
        self.log_dict(metrics)

    def configure_optimizers(self):

        optimizer = torch.optim.AdamW(self.parameters(), lr=1e-3, weight_decay=0.01)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            max_lr=1e-3,
            epochs=10,
            optimizer=optimizer,
            steps_per_epoch=int(len(train_Dataset) / self.batch_size),
            pct_start=0.1,
            div_factor=10,
            final_div_factor=100,
            base_momentum=0.90,
            max_momentum=0.95,
        )
        return [optimizer], [scheduler]

from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import os

checkpoint_callback = ModelCheckpoint(
    dirpath='checkpoints',
    verbose=True,
    save_last=True,
    save_top_k= 3,
    monitor='val_mse',
    mode='min'
)

early_stopping = EarlyStopping(
    monitor='val_mse',
    patience=3,
    verbose=True,
    mode='min'
)

# training
trainer_args = {
    'callbacks': [checkpoint_callback, early_stopping],
    'max_epochs' : 30
}

trainer = pl.Trainer(**trainer_args)
model = LinRegModel(13)
trainer.fit(model, train_loader, valid_loader)






GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name   | Type   | Params
----------------------------------
0 | linear | Linear | 14    
----------------------------------
14        Trainable params
0         Non-trainable params
14        Total params
0.000     Total estimated model params size (MB)


Epoch 0: 100%|██████████| 15/15 [00:00<00:00, 140.19it/s, loss=5.87e+05, v_num=6]

Metric val_mse improved. New best score: 598.572


Epoch 0: 100%|██████████| 15/15 [00:00<00:00, 130.44it/s, loss=5.87e+05, v_num=6]

Epoch 0, global step 10: val_mse reached 598.57214 (best 598.57214), saving model to "c:\Users\chkim\OneDrive\바탕 화면\Study\checkpoints\epoch=0-step=10-v1.ckpt" as top 3


Epoch 1: 100%|██████████| 15/15 [00:00<00:00, 137.79it/s, loss=5.92e+05, v_num=6]

Epoch 1, global step 21: val_mse reached 611.67932 (best 598.57214), saving model to "c:\Users\chkim\OneDrive\바탕 화면\Study\checkpoints\epoch=1-step=21.ckpt" as top 3


Epoch 2: 100%|██████████| 15/15 [00:00<00:00, 120.00it/s, loss=5.81e+05, v_num=6]

Epoch 2, global step 32: val_mse reached 623.51428 (best 598.57214), saving model to "c:\Users\chkim\OneDrive\바탕 화면\Study\checkpoints\epoch=2-step=32-v1.ckpt" as top 3


Epoch 3: 100%|██████████| 15/15 [00:00<00:00, 159.57it/s, loss=5.85e+05, v_num=6]

Metric val_mse improved by 5.581 >= min_delta = 0.0. New best score: 592.991


Epoch 3: 100%|██████████| 15/15 [00:00<00:00, 145.63it/s, loss=5.85e+05, v_num=6]

Epoch 3, global step 43: val_mse reached 592.99084 (best 592.99084), saving model to "c:\Users\chkim\OneDrive\바탕 화면\Study\checkpoints\epoch=3-step=43-v1.ckpt" as top 3


Epoch 4: 100%|██████████| 15/15 [00:00<00:00, 161.39it/s, loss=5.75e+05, v_num=6]

Metric val_mse improved by 5.055 >= min_delta = 0.0. New best score: 587.935


Epoch 4: 100%|██████████| 15/15 [00:00<00:00, 147.14it/s, loss=5.75e+05, v_num=6]

Epoch 4, global step 54: val_mse reached 587.93536 (best 587.93536), saving model to "c:\Users\chkim\OneDrive\바탕 화면\Study\checkpoints\epoch=4-step=54.ckpt" as top 3


Epoch 5: 100%|██████████| 15/15 [00:00<00:00, 183.02it/s, loss=5.78e+05, v_num=6]

Metric val_mse improved by 0.820 >= min_delta = 0.0. New best score: 587.115


Epoch 5: 100%|██████████| 15/15 [00:00<00:00, 170.49it/s, loss=5.78e+05, v_num=6]

Epoch 5, global step 65: val_mse reached 587.11493 (best 587.11493), saving model to "c:\Users\chkim\OneDrive\바탕 화면\Study\checkpoints\epoch=5-step=65.ckpt" as top 3


Epoch 6: 100%|██████████| 15/15 [00:00<00:00, 142.20it/s, loss=5.8e+05, v_num=6] 

Epoch 6, global step 76: val_mse was not in top 3


Epoch 7: 100%|██████████| 15/15 [00:00<00:00, 139.56it/s, loss=5.79e+05, v_num=6]

Epoch 7, global step 87: val_mse was not in top 3


Epoch 8: 100%|██████████| 15/15 [00:00<00:00, 140.20it/s, loss=5.68e+05, v_num=6]

Monitored metric val_mse did not improve in the last 3 records. Best score: 587.115. Signaling Trainer to stop.


Epoch 8: 100%|██████████| 15/15 [00:00<00:00, 132.75it/s, loss=5.68e+05, v_num=6]

Epoch 8, global step 98: val_mse was not in top 3


Epoch 8: 100%|██████████| 15/15 [00:00<00:00, 126.91it/s, loss=5.68e+05, v_num=6]

Saving latest checkpoint...





In [None]:
trainer = pl.Trainer(
              precision=16,                    # Double precision (64), full precision (32) or half precision (16)
                               # Distributed_backend (dp, ddp, etc ...)
              #gpus=4,                          # GPU 개수
              accumulate_grad_batches=1,       # Gradient를 몇 개의 배치동안 누적해서 계산할 것인지
              #amp_backend="apex",              # mixed precision backend to use (“native” or “apex”)
              auto_select_gpus=True,           # 사용가능한 GPU를 알아서 잡아준다.
              check_val_every_n_epoch=1,       # 몇 개의 epoch마다 validation 할 것 인지
              gradient_clip_val=5.0,           # Gradient clipping을 얼마로 할 것인지
               # 로그 선택
              auto_scale_batch_size="binsearch",                    # 메모리에 적합한 가장 큰 배치 사이즈를 찾아준다.
              max_epochs=20,                                        # 최대 epoch 수
)

model = LinRegModel(input_dim=13)
trainer.fit(model, train_loader, valid_loader)