From a609778d86c5560d9b9fd91752a50ec94e0c3414 Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Thu, 16 Feb 2023 10:01:54 +0800 Subject: [PATCH 1/9] [chatgpt]fix train_rm bug with lora --- applications/ChatGPT/chatgpt/trainer/rm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/ChatGPT/chatgpt/trainer/rm.py b/applications/ChatGPT/chatgpt/trainer/rm.py index c24289502830..b76ae537306c 100644 --- a/applications/ChatGPT/chatgpt/trainer/rm.py +++ b/applications/ChatGPT/chatgpt/trainer/rm.py @@ -43,7 +43,7 @@ def fit(self, use_lora): # train if use_lora > 0: print("Using Lora") - lora.mark_only_lora_as_trainable(self.model) + lora.mark_only_lora_as_trainable(self.model.model) else: self.model.train() for chosen_ids, c_mask, reject_ids, r_mask in self.train_dataloader: From f6bc949ac0ff3e1dbd961bcb63d1547bf0d01812 Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Thu, 16 Feb 2023 11:00:12 +0800 Subject: [PATCH 2/9] [chatgpt]support colossalai strategy to train rm --- applications/ChatGPT/chatgpt/trainer/rm.py | 30 +++++++++----- applications/ChatGPT/examples/train_dummy.sh | 4 +- .../ChatGPT/examples/train_prompts.sh | 4 +- .../ChatGPT/examples/train_reward_model.py | 41 +++++++++++++++---- applications/ChatGPT/examples/train_rm.sh | 4 +- 5 files changed, 58 insertions(+), 25 deletions(-) diff --git a/applications/ChatGPT/chatgpt/trainer/rm.py b/applications/ChatGPT/chatgpt/trainer/rm.py index b76ae537306c..5a5b52153e85 100644 --- a/applications/ChatGPT/chatgpt/trainer/rm.py +++ b/applications/ChatGPT/chatgpt/trainer/rm.py @@ -1,11 +1,13 @@ from abc import ABC - import loralib as lora from chatgpt.dataset import RewardDataset from chatgpt.nn import PairWiseLoss from torch.optim import Adam from torch.utils.data import DataLoader from tqdm import tqdm +from .strategies import Strategy +from torch.optim import Optimizer +from .utils import is_rank_0 class RewardModelTrainer(ABC): @@ -14,32 +16,38 @@ class RewardModelTrainer(ABC): Args: model (torch.nn.Module): the model to train + strategy (Strategy): the strategy to use for training + optim(Optimizer): the optimizer to use for training train_dataset (RewardDataset): the dataset to use for training eval_dataset (RewardDataset): the dataset to use for evaluation batch_size (int, defaults to 1): the batch size while training - num_epochs (int, defaults to 2): the number of epochs to train + max_epochs (int, defaults to 2): the number of epochs to train optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer """ def __init__(self, model, + strategy: Strategy, + optim : Optimizer, train_dataset: RewardDataset, eval_dataset: RewardDataset, batch_size: int = 1, - num_epochs: int = 2, - optim_kwargs: dict = {'lr': 1e-4}) -> None: + max_epochs: int = 2,) -> None: super().__init__() - self.model = model + self.strategy = strategy + self.epochs = max_epochs self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size) self.eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size) + + self.model = strategy.setup_model(model) self.loss_fn = PairWiseLoss() - self.optimizer = Adam(self.model.parameters(), **optim_kwargs) - self.epochs = num_epochs + self.optimizer = strategy.setup_optimizer(optim, self.model) + def fit(self, use_lora): - epoch_bar = tqdm(range(self.epochs), desc='Train epoch') + epoch_bar = tqdm(range(self.epochs), desc='Train epoch', disable=not is_rank_0()) for epoch in range(self.epochs): - step_bar = tqdm(range(self.train_dataloader.__len__()), desc='Train step of epoch %d' % epoch) + step_bar = tqdm(range(self.train_dataloader.__len__()), desc='Train step of epoch %d' % epoch, disable=not is_rank_0()) # train if use_lora > 0: print("Using Lora") @@ -54,8 +62,8 @@ def fit(self, use_lora): chosen_reward = self.model(chosen_ids, attention_mask=c_mask) reject_reward = self.model(reject_ids, attention_mask=r_mask) loss = self.loss_fn(chosen_reward, reject_reward) - loss.backward() - self.optimizer.step() + self.strategy.backward(loss, self.model, self.optimizer) + self.strategy.optimizer_step(self.optimizer) self.optimizer.zero_grad() step_bar.update() step_bar.set_postfix({'loss': loss.item()}) diff --git a/applications/ChatGPT/examples/train_dummy.sh b/applications/ChatGPT/examples/train_dummy.sh index 559d338ee021..562b8265398b 100755 --- a/applications/ChatGPT/examples/train_dummy.sh +++ b/applications/ChatGPT/examples/train_dummy.sh @@ -13,6 +13,6 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() { echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" } -set_n_least_used_CUDA_VISIBLE_DEVICES 1 +set_n_least_used_CUDA_VISIBLE_DEVICES 2 -python train_dummy.py --model bloom --pretrain '/data2/users/lczht/bloom-560m' --lora_rank 16 +torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2 \ No newline at end of file diff --git a/applications/ChatGPT/examples/train_prompts.sh b/applications/ChatGPT/examples/train_prompts.sh index 0b82d3f1cd5e..db73ac8e8e85 100755 --- a/applications/ChatGPT/examples/train_prompts.sh +++ b/applications/ChatGPT/examples/train_prompts.sh @@ -13,6 +13,6 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() { echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" } -set_n_least_used_CUDA_VISIBLE_DEVICES 1 +set_n_least_used_CUDA_VISIBLE_DEVICES 2 -python train_prompts.py prompts.csv --pretrain '/data2/users/lczht/bloom-560m' --lora_rank 16 +torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2 diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/ChatGPT/examples/train_reward_model.py index fd78a2ac6325..1455cbc3f662 100644 --- a/applications/ChatGPT/examples/train_reward_model.py +++ b/applications/ChatGPT/examples/train_reward_model.py @@ -2,36 +2,58 @@ import loralib as lora import torch +from torch.optim import Adam from chatgpt.dataset import RewardDataset from chatgpt.nn import BLOOMRM from chatgpt.trainer import RewardModelTrainer from datasets import load_dataset from transformers import BloomTokenizerFast +from colossalai.nn.optimizer import HybridAdam +from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy + def train(args): + # configure strategy + if args.strategy == 'naive': + strategy = NaiveStrategy() + elif args.strategy == 'ddp': + strategy = DDPStrategy() + elif args.strategy == 'colossalai_gemini': + strategy = ColossalAIStrategy(stage=3, placement_policy='cuda') + elif args.strategy == 'colossalai_zero2': + strategy = ColossalAIStrategy(stage=2, placement_policy='cuda') + else: + raise ValueError(f'Unsupported strategy "{args.strategy}"') + + # configure model tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain) tokenizer.pad_token = tokenizer.eos_token - model = BLOOMRM(pretrained=args.pretrain) - - model.cuda() - + model = BLOOMRM(pretrained=args.pretrain).cuda() max_len = 1024 - + + # configure optimizer + if args.strategy.startswith('colossalai'): + optim = HybridAdam(model.parameters(), lr=5e-5) + else: + optim = Adam(model.parameters(), lr=5e-5) + # prepare for data and dataset data = load_dataset(args.dataset) - train_data = data["train"] - eval_data = data['test'] + train_data = data["train"].select(range(100)) + eval_data = data['test'].select(range(5)) train_dataset = RewardDataset(train_data, tokenizer, max_len) eval_dataset = RewardDataset(eval_data, tokenizer, max_len) # batch_size here is expected to be C(k,2), k means # response of each prompt # be limited with the format of dataset 'Dahoas/rm-static', we'd better use batch_size as 1 trainer = RewardModelTrainer(model=model, + strategy=strategy, + optim=optim, train_dataset=train_dataset, eval_dataset=eval_dataset, batch_size=args.batch_size, - num_epochs=args.max_epochs) + max_epochs=args.max_epochs) trainer.fit(use_lora=args.lora_rank) @@ -43,6 +65,9 @@ def train(args): if __name__ == '__main__': parser = argparse.ArgumentParser() + parser.add_argument('--strategy', + choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], + default='naive') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--dataset', type=str, default='Dahoas/rm-static') parser.add_argument('--save_path', type=str, default='rm_ckpt.pth') diff --git a/applications/ChatGPT/examples/train_rm.sh b/applications/ChatGPT/examples/train_rm.sh index bf46d7e43ff2..ed91deee2c59 100755 --- a/applications/ChatGPT/examples/train_rm.sh +++ b/applications/ChatGPT/examples/train_rm.sh @@ -13,6 +13,6 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() { echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" } -set_n_least_used_CUDA_VISIBLE_DEVICES 1 +set_n_least_used_CUDA_VISIBLE_DEVICES 2 -python train_reward_model.py --pretrain '/data2/users/lczht/bloom-560m' --lora_rank 16 +torchrun --standalone --nproc_per_node=2 train_reward_model.py --pretrain '/data2/users/lczht/bloom-560m' --strategy colossalai_zero2 From 350dfa399d144d8b20e5cf6acdc9ecbf65b89efa Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Thu, 16 Feb 2023 11:11:51 +0800 Subject: [PATCH 3/9] fix pre-commit --- applications/ChatGPT/chatgpt/trainer/rm.py | 30 ++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/applications/ChatGPT/chatgpt/trainer/rm.py b/applications/ChatGPT/chatgpt/trainer/rm.py index 5a5b52153e85..f6639edcbbb4 100644 --- a/applications/ChatGPT/chatgpt/trainer/rm.py +++ b/applications/ChatGPT/chatgpt/trainer/rm.py @@ -1,12 +1,13 @@ from abc import ABC + import loralib as lora from chatgpt.dataset import RewardDataset from chatgpt.nn import PairWiseLoss -from torch.optim import Adam +from torch.optim import Adam, Optimizer from torch.utils.data import DataLoader from tqdm import tqdm + from .strategies import Strategy -from torch.optim import Optimizer from .utils import is_rank_0 @@ -25,29 +26,32 @@ class RewardModelTrainer(ABC): optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer """ - def __init__(self, - model, - strategy: Strategy, - optim : Optimizer, - train_dataset: RewardDataset, - eval_dataset: RewardDataset, - batch_size: int = 1, - max_epochs: int = 2,) -> None: + def __init__( + self, + model, + strategy: Strategy, + optim: Optimizer, + train_dataset: RewardDataset, + eval_dataset: RewardDataset, + batch_size: int = 1, + max_epochs: int = 2, + ) -> None: super().__init__() self.strategy = strategy self.epochs = max_epochs self.train_dataloader = DataLoader(train_dataset, batch_size=batch_size) self.eval_dataloader = DataLoader(eval_dataset, batch_size=batch_size) - + self.model = strategy.setup_model(model) self.loss_fn = PairWiseLoss() self.optimizer = strategy.setup_optimizer(optim, self.model) - def fit(self, use_lora): epoch_bar = tqdm(range(self.epochs), desc='Train epoch', disable=not is_rank_0()) for epoch in range(self.epochs): - step_bar = tqdm(range(self.train_dataloader.__len__()), desc='Train step of epoch %d' % epoch, disable=not is_rank_0()) + step_bar = tqdm(range(self.train_dataloader.__len__()), + desc='Train step of epoch %d' % epoch, + disable=not is_rank_0()) # train if use_lora > 0: print("Using Lora") From 501d18d5c20d2e9133c58dbc4d19fb42d5b22f6f Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Thu, 16 Feb 2023 11:17:15 +0800 Subject: [PATCH 4/9] fix pre-commit 2 --- applications/ChatGPT/examples/train_dummy.sh | 2 +- applications/ChatGPT/examples/train_reward_model.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/applications/ChatGPT/examples/train_dummy.sh b/applications/ChatGPT/examples/train_dummy.sh index 562b8265398b..595da573e2b1 100755 --- a/applications/ChatGPT/examples/train_dummy.sh +++ b/applications/ChatGPT/examples/train_dummy.sh @@ -15,4 +15,4 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() { set_n_least_used_CUDA_VISIBLE_DEVICES 2 -torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2 \ No newline at end of file +torchrun --standalone --nproc_per_node=2 train_dummy.py --strategy colossalai_zero2 diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/ChatGPT/examples/train_reward_model.py index 1455cbc3f662..47688325ed7a 100644 --- a/applications/ChatGPT/examples/train_reward_model.py +++ b/applications/ChatGPT/examples/train_reward_model.py @@ -2,15 +2,15 @@ import loralib as lora import torch -from torch.optim import Adam from chatgpt.dataset import RewardDataset from chatgpt.nn import BLOOMRM from chatgpt.trainer import RewardModelTrainer +from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy from datasets import load_dataset +from torch.optim import Adam from transformers import BloomTokenizerFast from colossalai.nn.optimizer import HybridAdam -from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy def train(args): @@ -25,19 +25,19 @@ def train(args): strategy = ColossalAIStrategy(stage=2, placement_policy='cuda') else: raise ValueError(f'Unsupported strategy "{args.strategy}"') - + # configure model tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain) tokenizer.pad_token = tokenizer.eos_token model = BLOOMRM(pretrained=args.pretrain).cuda() max_len = 1024 - + # configure optimizer if args.strategy.startswith('colossalai'): optim = HybridAdam(model.parameters(), lr=5e-5) else: optim = Adam(model.parameters(), lr=5e-5) - + # prepare for data and dataset data = load_dataset(args.dataset) train_data = data["train"].select(range(100)) From b387b3218bd2d362b5d944892191d01e665a463b Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Fri, 17 Feb 2023 20:08:40 +0800 Subject: [PATCH 5/9] [chatgpt]fix rm eval typo --- applications/ChatGPT/chatgpt/dataset/__init__.py | 3 ++- applications/ChatGPT/chatgpt/dataset/reward_dataset.py | 4 +++- applications/ChatGPT/chatgpt/dataset/utils.py | 5 +++++ applications/ChatGPT/chatgpt/nn/reward_model.py | 4 ++-- applications/ChatGPT/chatgpt/trainer/rm.py | 4 ++-- 5 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 applications/ChatGPT/chatgpt/dataset/utils.py diff --git a/applications/ChatGPT/chatgpt/dataset/__init__.py b/applications/ChatGPT/chatgpt/dataset/__init__.py index 2f330ee67afe..b4599c82ba75 100644 --- a/applications/ChatGPT/chatgpt/dataset/__init__.py +++ b/applications/ChatGPT/chatgpt/dataset/__init__.py @@ -1,3 +1,4 @@ from .reward_dataset import RewardDataset +from .utils import is_rank_0 -__all__ = ['RewardDataset'] +__all__ = ['RewardDataset', 'is_rank_0'] diff --git a/applications/ChatGPT/chatgpt/dataset/reward_dataset.py b/applications/ChatGPT/chatgpt/dataset/reward_dataset.py index 14edcce30d19..8bc850f2d52d 100644 --- a/applications/ChatGPT/chatgpt/dataset/reward_dataset.py +++ b/applications/ChatGPT/chatgpt/dataset/reward_dataset.py @@ -3,6 +3,8 @@ from torch.utils.data import Dataset from tqdm import tqdm +from .utils import is_rank_0 + class RewardDataset(Dataset): """ @@ -18,7 +20,7 @@ def __init__(self, dataset, tokenizer: Callable, max_length: int) -> None: super().__init__() self.chosen = [] self.reject = [] - for data in tqdm(dataset): + for data in tqdm(dataset, disable=not is_rank_0()): prompt = data['prompt'] chosen = prompt + data['chosen'] + "<|endoftext|>" diff --git a/applications/ChatGPT/chatgpt/dataset/utils.py b/applications/ChatGPT/chatgpt/dataset/utils.py new file mode 100644 index 000000000000..6c9f7f085f8c --- /dev/null +++ b/applications/ChatGPT/chatgpt/dataset/utils.py @@ -0,0 +1,5 @@ +import torch.distributed as dist + + +def is_rank_0() -> bool: + return not dist.is_initialized() or dist.get_rank() == 0 diff --git a/applications/ChatGPT/chatgpt/nn/reward_model.py b/applications/ChatGPT/chatgpt/nn/reward_model.py index 5108f61a6186..baaa8b768766 100644 --- a/applications/ChatGPT/chatgpt/nn/reward_model.py +++ b/applications/ChatGPT/chatgpt/nn/reward_model.py @@ -23,7 +23,7 @@ def __init__(self, lora_rank: int = 0, lora_train_bias: str = 'none') -> None: super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias) - self.model = model + self.body = model if value_head is not None: if value_head.out_features != 1: raise ValueError("The value head of reward model's output dim should be 1!") @@ -34,7 +34,7 @@ def __init__(self, self.convert_to_lora() def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor: - outputs = self.model(sequences, attention_mask=attention_mask) + outputs = self.body(sequences, attention_mask=attention_mask) last_hidden_states = outputs['last_hidden_state'] values = self.value_head(last_hidden_states)[:, :-1] value = values.mean(dim=1).squeeze(1) # ensure shape is (B) diff --git a/applications/ChatGPT/chatgpt/trainer/rm.py b/applications/ChatGPT/chatgpt/trainer/rm.py index f6639edcbbb4..78f8c64ebe99 100644 --- a/applications/ChatGPT/chatgpt/trainer/rm.py +++ b/applications/ChatGPT/chatgpt/trainer/rm.py @@ -55,7 +55,7 @@ def fit(self, use_lora): # train if use_lora > 0: print("Using Lora") - lora.mark_only_lora_as_trainable(self.model.model) + lora.mark_only_lora_as_trainable(self.model.body) else: self.model.train() for chosen_ids, c_mask, reject_ids, r_mask in self.train_dataloader: @@ -74,8 +74,8 @@ def fit(self, use_lora): # eval self.model.eval() + dist = 0 for chosen_ids, c_mask, reject_ids, r_mask in self.eval_dataloader: - dist = 0 chosen_ids = chosen_ids.squeeze(1).cuda() c_mask = c_mask.squeeze(1).cuda() reject_ids = reject_ids.squeeze(1).cuda() From a6ed4998158efdbadd2f60b7244b24f9fa814e77 Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Mon, 20 Feb 2023 10:41:34 +0800 Subject: [PATCH 6/9] fix rm eval --- applications/ChatGPT/chatgpt/trainer/rm.py | 28 +++++++++++-------- .../ChatGPT/examples/train_reward_model.py | 11 ++++---- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/applications/ChatGPT/chatgpt/trainer/rm.py b/applications/ChatGPT/chatgpt/trainer/rm.py index 78f8c64ebe99..fdc3c6e0bccd 100644 --- a/applications/ChatGPT/chatgpt/trainer/rm.py +++ b/applications/ChatGPT/chatgpt/trainer/rm.py @@ -1,6 +1,7 @@ from abc import ABC import loralib as lora +import torch from chatgpt.dataset import RewardDataset from chatgpt.nn import PairWiseLoss from torch.optim import Adam, Optimizer @@ -74,16 +75,21 @@ def fit(self, use_lora): # eval self.model.eval() - dist = 0 - for chosen_ids, c_mask, reject_ids, r_mask in self.eval_dataloader: - chosen_ids = chosen_ids.squeeze(1).cuda() - c_mask = c_mask.squeeze(1).cuda() - reject_ids = reject_ids.squeeze(1).cuda() - r_mask = r_mask.squeeze(1).cuda() - chosen_reward = self.model(chosen_ids, attention_mask=c_mask) - reject_reward = self.model(reject_ids, attention_mask=r_mask) - dist += (chosen_reward - reject_reward) - dist_mean = dist / self.eval_dataloader.__len__() + with torch.no_grad(): + dist = 0 + loss_sum = 0 + for chosen_ids, c_mask, reject_ids, r_mask in self.eval_dataloader: + chosen_ids = chosen_ids.squeeze(1).cuda() + c_mask = c_mask.squeeze(1).cuda() + reject_ids = reject_ids.squeeze(1).cuda() + r_mask = r_mask.squeeze(1).cuda() + chosen_reward = self.model(chosen_ids, attention_mask=c_mask) + reject_reward = self.model(reject_ids, attention_mask=r_mask) + dist += (chosen_reward - reject_reward).mean().item() + loss = self.loss_fn(chosen_reward, reject_reward) + loss_sum += loss.item() + dist_mean = dist / self.eval_dataloader.__len__() + loss_mean = loss_sum / self.eval_dataloader.__len__() epoch_bar.update() - step_bar.set_postfix({'loss': loss.item(), 'dist_mean': dist_mean.item()}) + step_bar.set_postfix({'loss': loss_mean, 'dist_mean': dist_mean}) step_bar.close() diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/ChatGPT/examples/train_reward_model.py index 47688325ed7a..6673bdc34592 100644 --- a/applications/ChatGPT/examples/train_reward_model.py +++ b/applications/ChatGPT/examples/train_reward_model.py @@ -29,7 +29,8 @@ def train(args): # configure model tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain) tokenizer.pad_token = tokenizer.eos_token - model = BLOOMRM(pretrained=args.pretrain).cuda() + with strategy.model_init_context(): + model = BLOOMRM(pretrained=args.pretrain).cuda() max_len = 1024 # configure optimizer @@ -40,8 +41,8 @@ def train(args): # prepare for data and dataset data = load_dataset(args.dataset) - train_data = data["train"].select(range(100)) - eval_data = data['test'].select(range(5)) + train_data = data["train"] + eval_data = data['test'] train_dataset = RewardDataset(train_data, tokenizer, max_len) eval_dataset = RewardDataset(eval_data, tokenizer, max_len) @@ -71,8 +72,8 @@ def train(args): parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--dataset', type=str, default='Dahoas/rm-static') parser.add_argument('--save_path', type=str, default='rm_ckpt.pth') - parser.add_argument('--max_epochs', type=int, default=2) - parser.add_argument('--batch_size', type=int, default=1) + parser.add_argument('--max_epochs', type=int, default=10) + parser.add_argument('--batch_size', type=int, default=4) parser.add_argument('--lora_rank', type=int, default=0, help="low-rank adaptation matrices rank") args = parser.parse_args() train(args) From a45bf85f7843f3bc7df04c7cfdada86f117dd660 Mon Sep 17 00:00:00 2001 From: BlueRum <70618399+ht-zhou@users.noreply.github.com> Date: Mon, 20 Feb 2023 10:50:06 +0800 Subject: [PATCH 7/9] fix pre commit --- applications/ChatGPT/examples/train_reward_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/ChatGPT/examples/train_reward_model.py index 1f5839f844af..57d47b6959a1 100644 --- a/applications/ChatGPT/examples/train_reward_model.py +++ b/applications/ChatGPT/examples/train_reward_model.py @@ -29,7 +29,6 @@ def train(args): # configure model tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain) tokenizer.pad_token = tokenizer.eos_token - with strategy.model_init_context(): model = BLOOMRM(pretrained=args.pretrain).cuda() max_len = 1024 From 08e39c180d0e405bd6a600d468524ee46e7f0a8a Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Tue, 21 Feb 2023 16:34:41 +0800 Subject: [PATCH 8/9] add support of saving ckpt in examples --- applications/ChatGPT/examples/train_dummy.py | 6 ++++++ applications/ChatGPT/examples/train_prompts.py | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/applications/ChatGPT/examples/train_dummy.py b/applications/ChatGPT/examples/train_dummy.py index f98b4792d978..6e676720b875 100644 --- a/applications/ChatGPT/examples/train_dummy.py +++ b/applications/ChatGPT/examples/train_dummy.py @@ -2,6 +2,7 @@ from copy import deepcopy import torch +import torch.distributed as dist from chatgpt.nn import BLOOMActor, BLOOMCritic, GPTActor, GPTCritic, OPTActor, OPTCritic, RewardModel from chatgpt.trainer import PPOTrainer from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy @@ -97,6 +98,11 @@ def main(args): max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps) + # save model checkpoint after fitting on only rank0 + strategy.save_model(actor, 'actor_checkpoint_dummy.pt', only_rank0=True) + # save optimizer checkpoint on all ranks + strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint_dummy_%d.pt' % (dist.get_rank()), only_rank0=False) + if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/applications/ChatGPT/examples/train_prompts.py b/applications/ChatGPT/examples/train_prompts.py index e79b2acf11b1..f6f9cc28a738 100644 --- a/applications/ChatGPT/examples/train_prompts.py +++ b/applications/ChatGPT/examples/train_prompts.py @@ -2,6 +2,7 @@ from copy import deepcopy import pandas as pd +import torch.distributed as dist from chatgpt.nn import BLOOMActor, BLOOMCritic, GPTActor, GPTCritic, OPTActor, OPTCritic, RewardModel from chatgpt.trainer import PPOTrainer from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy @@ -95,6 +96,10 @@ def tokenize_fn(texts): num_episodes=args.num_episodes, max_timesteps=args.max_timesteps, update_timesteps=args.update_timesteps) + # save model checkpoint after fitting on only rank0 + strategy.save_model(actor, 'actor_checkpoint_prompts.pt', only_rank0=True) + # save optimizer checkpoint on all ranks + strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint_prompts_%d.pt' % (dist.get_rank()), only_rank0=False) if __name__ == '__main__': From b0867c75ad30083bdc6d553fdbe0462ad86bf5e5 Mon Sep 17 00:00:00 2001 From: zhouht00 Date: Tue, 21 Feb 2023 17:13:04 +0800 Subject: [PATCH 9/9] fix single-gpu save --- applications/ChatGPT/examples/train_dummy.py | 5 +++-- applications/ChatGPT/examples/train_prompts.py | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/applications/ChatGPT/examples/train_dummy.py b/applications/ChatGPT/examples/train_dummy.py index 6e676720b875..a27d77a50fdf 100644 --- a/applications/ChatGPT/examples/train_dummy.py +++ b/applications/ChatGPT/examples/train_dummy.py @@ -2,7 +2,6 @@ from copy import deepcopy import torch -import torch.distributed as dist from chatgpt.nn import BLOOMActor, BLOOMCritic, GPTActor, GPTCritic, OPTActor, OPTCritic, RewardModel from chatgpt.trainer import PPOTrainer from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy @@ -101,7 +100,9 @@ def main(args): # save model checkpoint after fitting on only rank0 strategy.save_model(actor, 'actor_checkpoint_dummy.pt', only_rank0=True) # save optimizer checkpoint on all ranks - strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint_dummy_%d.pt' % (dist.get_rank()), only_rank0=False) + strategy.save_optimizer(actor_optim, + 'actor_optim_checkpoint_dummy_%d.pt' % (torch.cuda.current_device()), + only_rank0=False) if __name__ == '__main__': diff --git a/applications/ChatGPT/examples/train_prompts.py b/applications/ChatGPT/examples/train_prompts.py index f6f9cc28a738..53aa150a06fd 100644 --- a/applications/ChatGPT/examples/train_prompts.py +++ b/applications/ChatGPT/examples/train_prompts.py @@ -2,7 +2,7 @@ from copy import deepcopy import pandas as pd -import torch.distributed as dist +import torch from chatgpt.nn import BLOOMActor, BLOOMCritic, GPTActor, GPTCritic, OPTActor, OPTCritic, RewardModel from chatgpt.trainer import PPOTrainer from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy @@ -99,7 +99,9 @@ def tokenize_fn(texts): # save model checkpoint after fitting on only rank0 strategy.save_model(actor, 'actor_checkpoint_prompts.pt', only_rank0=True) # save optimizer checkpoint on all ranks - strategy.save_optimizer(actor_optim, 'actor_optim_checkpoint_prompts_%d.pt' % (dist.get_rank()), only_rank0=False) + strategy.save_optimizer(actor_optim, + 'actor_optim_checkpoint_prompts_%d.pt' % (torch.cuda.current_device()), + only_rank0=False) if __name__ == '__main__':