Skip to content

Commit

Permalink
use tqdm (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
ver217 committed Jul 8, 2022
1 parent 4db931d commit 354c9df
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
2 changes: 1 addition & 1 deletion colo_nvme/offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class DiskOffloader(Offloader):
def __init__(self, dir_name: str, n_entries: int = 128, backend: str = 'uring') -> None:
def __init__(self, dir_name: str, n_entries: int = 16, backend: str = 'uring') -> None:
assert backend in ('uring', 'aio')
if not os.path.exists(dir_name):
os.mkdir(dir_name)
Expand Down
28 changes: 18 additions & 10 deletions tests/test_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import torch.nn as nn
from colo_nvme import DiskOffloader
from transformers import GPT2Config, GPT2LMHeadModel
from time import time
from typing import Optional
from tqdm import tqdm


N_WARMUP = 5
N_ACTIVATE = 10
N_WARMUP = 2
N_ACTIVATE = 3


class GPTLMModel(nn.Module):
Expand All @@ -33,6 +33,14 @@ def gpt2_xl(checkpoint=False):
return GPTLMModel(hidden_size=1600, num_layers=48, num_attention_heads=16, checkpoint=checkpoint)


def gpt2_8b(checkpoint=False):
return GPTLMModel(hidden_size=4096, num_layers=90, num_attention_heads=16, checkpoint=checkpoint)


def gpt2_20b(checkpoint=False):
return GPTLMModel(hidden_size=8192, num_layers=25, num_attention_heads=16, checkpoint=checkpoint)


def adam(step, lr, param, grad, exp_avg, exp_avg_sq, beta1=0.9, beta2=0.999, eps=1e-12):
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
exp_avg_sq.mul_(beta2).addcmul_(grad, grad.conj(), value=1 - beta2)
Expand Down Expand Up @@ -142,20 +150,20 @@ def _post_step(self, idx, params):
def run_adam(model: torch.nn.Module, nvme_offload: bool, backend: str, prefetch: int, vecio: bool):
offloader = None
if nvme_offload:
offloader = DiskOffloader('.', backend=backend)
offloader = DiskOffloader('.', 8, backend=backend)
optimizer = Adam(model.parameters(), 1e-3, offloader=offloader, prefetch=prefetch, vecio=vecio)
for p in model.parameters():
p.grad = torch.rand_like(p)
for _ in range(N_WARMUP):
optimizer.step()
start = time()
for _ in range(N_ACTIVATE):
optimizer.step()
dur = time() - start
if not nvme_offload:
print(f'CPU: time={dur/N_ACTIVATE:.3f}')
desc = 'CPU'
postfix = None
else:
print(f'NVME offload: backend={backend}, prefetch={prefetch}, vecio={vecio}, time={dur/N_ACTIVATE:.3f}')
desc = 'NVME'
postfix = {'backend': backend, 'prefetch': prefetch, 'vecio': vecio}
for _ in tqdm(range(N_ACTIVATE), desc=desc, postfix=postfix):
optimizer.step()


if __name__ == '__main__':
Expand Down

0 comments on commit 354c9df

Please sign in to comment.