diff --git a/applications/Colossal-LLaMA-2/colossal_llama2/dataset/loader.py b/applications/Colossal-LLaMA-2/colossal_llama2/dataset/loader.py index 327651f4e645..abe0fd51a4af 100644 --- a/applications/Colossal-LLaMA-2/colossal_llama2/dataset/loader.py +++ b/applications/Colossal-LLaMA-2/colossal_llama2/dataset/loader.py @@ -80,15 +80,19 @@ def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[str, torch # `List[torch.Tensor]` batch_input_ids = [ - torch.LongTensor(instance["input_ids"][: self.max_length]) - if len(instance["input_ids"]) > self.max_length - else torch.LongTensor(instance["input_ids"]) + ( + torch.LongTensor(instance["input_ids"][: self.max_length]) + if len(instance["input_ids"]) > self.max_length + else torch.LongTensor(instance["input_ids"]) + ) for instance in instances ] batch_labels = [ - torch.LongTensor(instance["labels"][: self.max_length]) - if len(instance["labels"]) > self.max_length - else torch.LongTensor(instance["labels"]) + ( + torch.LongTensor(instance["labels"][: self.max_length]) + if len(instance["labels"]) > self.max_length + else torch.LongTensor(instance["labels"]) + ) for instance in instances ] diff --git a/applications/Colossal-LLaMA-2/train.py b/applications/Colossal-LLaMA-2/train.py index d97da61e4dc8..7e11eba37dbc 100644 --- a/applications/Colossal-LLaMA-2/train.py +++ b/applications/Colossal-LLaMA-2/train.py @@ -253,9 +253,11 @@ def main() -> None: coordinator.print_on_master(f"Model params: {format_numel_str(model_numel)}") optimizer = HybridAdam( - model_params=filter(lambda p: p.requires_grad, model.parameters()) - if args.freeze_non_embeds_params - else model.parameters(), + model_params=( + filter(lambda p: p.requires_grad, model.parameters()) + if args.freeze_non_embeds_params + else model.parameters() + ), lr=args.lr, betas=(0.9, 0.95), weight_decay=args.weight_decay, diff --git a/applications/ColossalChat/coati/dataset/loader.py b/applications/ColossalChat/coati/dataset/loader.py index 93cc1dab8d21..9dd9ed8e5df3 100755 --- a/applications/ColossalChat/coati/dataset/loader.py +++ b/applications/ColossalChat/coati/dataset/loader.py @@ -89,15 +89,19 @@ def __call__(self, instances: Sequence[Dict[str, List[int]]]) -> Dict[str, torch # `List[torch.Tensor]` batch_input_ids = [ - torch.LongTensor(instance["input_ids"][: self.max_length]) - if len(instance["input_ids"]) > self.max_length - else torch.LongTensor(instance["input_ids"]) + ( + torch.LongTensor(instance["input_ids"][: self.max_length]) + if len(instance["input_ids"]) > self.max_length + else torch.LongTensor(instance["input_ids"]) + ) for instance in instances ] batch_labels = [ - torch.LongTensor(instance["labels"][: self.max_length]) - if len(instance["labels"]) > self.max_length - else torch.LongTensor(instance["labels"]) + ( + torch.LongTensor(instance["labels"][: self.max_length]) + if len(instance["labels"]) > self.max_length + else torch.LongTensor(instance["labels"]) + ) for instance in instances ] if self.tokenizer.padding_side == "right": diff --git a/applications/ColossalChat/coati/models/loss.py b/applications/ColossalChat/coati/models/loss.py index aaef447a4383..e411dded148c 100755 --- a/applications/ColossalChat/coati/models/loss.py +++ b/applications/ColossalChat/coati/models/loss.py @@ -1,6 +1,7 @@ """ loss functions """ + from typing import Optional, Tuple import torch diff --git a/applications/ColossalChat/coati/models/reward_model.py b/applications/ColossalChat/coati/models/reward_model.py index 18c5eca41a71..f5712312ada2 100755 --- a/applications/ColossalChat/coati/models/reward_model.py +++ b/applications/ColossalChat/coati/models/reward_model.py @@ -1,6 +1,7 @@ """ reward model """ + from typing import Optional import torch diff --git a/applications/ColossalChat/coati/trainer/utils.py b/applications/ColossalChat/coati/trainer/utils.py index 5ce1e9ef009c..3c836b4b4db1 100755 --- a/applications/ColossalChat/coati/trainer/utils.py +++ b/applications/ColossalChat/coati/trainer/utils.py @@ -1,6 +1,7 @@ """ Training utilities for Coati. """ + from typing import Any import torch diff --git a/applications/ColossalEval/colossal_eval/dataset/agieval.py b/applications/ColossalEval/colossal_eval/dataset/agieval.py index 32f8544e93df..d5f2302494e8 100644 --- a/applications/ColossalEval/colossal_eval/dataset/agieval.py +++ b/applications/ColossalEval/colossal_eval/dataset/agieval.py @@ -78,7 +78,9 @@ def get_prompt(line: Dict, dataset_name: str, logger: DistributedLogger) -> Dict option_string = "ABCDEFG" count = len(line["options"]) - input = "问题:" + line["question"] + " " + "从以下选项中选择:" + " ".join(line["options"]) + "\n" + "答案:" + input = ( + "问题:" + line["question"] + " " + "从以下选项中选择:" + " ".join(line["options"]) + "\n" + "答案:" + ) all_classes = list(option_string[0:count]) @@ -150,7 +152,15 @@ def combine_prompt(prompt_path, dataset_name, load_explanation=True, chat_mode=F ) elif dataset_name in chinese_qa_datasets: question_input = ( - "问题:" + passage + " " + question + "\n" + "从以下选项中选择:" + " ".join(options) + "\n" + "答案:{}".format(label) + "问题:" + + passage + + " " + + question + + "\n" + + "从以下选项中选择:" + + " ".join(options) + + "\n" + + "答案:{}".format(label) ) elif dataset_name in english_cloze_datasets: question_input = "Question: ".format(idx + 1) + question + "\n" + "Answer: {}".format(answer) diff --git a/applications/ColossalEval/colossal_eval/dataset/ceval.py b/applications/ColossalEval/colossal_eval/dataset/ceval.py index 2cf09ec4dc2f..915f4d9b0850 100644 --- a/applications/ColossalEval/colossal_eval/dataset/ceval.py +++ b/applications/ColossalEval/colossal_eval/dataset/ceval.py @@ -57,7 +57,11 @@ "urban_and_rural_planner": ["Urban and Rural Planner", "注册城乡规划师", "Other"], "accountant": ["Accountant", "注册会计师", "Other"], "fire_engineer": ["Fire Engineer", "注册消防工程师", "Other"], - "environmental_impact_assessment_engineer": ["Environmental Impact Assessment Engineer", "环境影响评价工程师", "Other"], + "environmental_impact_assessment_engineer": [ + "Environmental Impact Assessment Engineer", + "环境影响评价工程师", + "Other", + ], "tax_accountant": ["Tax Accountant", "税务师", "Other"], "physician": ["Physician", "医师资格", "Other"], } diff --git a/applications/ColossalEval/colossal_eval/dataset/mtbench.py b/applications/ColossalEval/colossal_eval/dataset/mtbench.py index 9e74a4d826e3..03141556788f 100644 --- a/applications/ColossalEval/colossal_eval/dataset/mtbench.py +++ b/applications/ColossalEval/colossal_eval/dataset/mtbench.py @@ -56,9 +56,11 @@ def load(path: str, logger: DistributedLogger, few_shot: bool) -> List[Dict]: "instruction": question["turns"], "input": "", "output": [], - "target": [""] * turn_number - if question["question_id"] not in reference - else reference[question["question_id"]], + "target": ( + [""] * turn_number + if question["question_id"] not in reference + else reference[question["question_id"]] + ), } if category in dataset["test"]: diff --git a/applications/ColossalEval/colossal_eval/models/huggingface.py b/applications/ColossalEval/colossal_eval/models/huggingface.py index fff697e21e34..23c399ccedbd 100644 --- a/applications/ColossalEval/colossal_eval/models/huggingface.py +++ b/applications/ColossalEval/colossal_eval/models/huggingface.py @@ -77,7 +77,9 @@ def _get_choices_indices(self, language: str): self.indices_for_choices[0].append( self.tokenizer(f"Answer: {choice}", add_special_tokens=False).input_ids[-1] ) - self.indices_for_choices[1].append(self.tokenizer(f"答案:{choice}", add_special_tokens=False).input_ids[-1]) + self.indices_for_choices[1].append( + self.tokenizer(f"答案:{choice}", add_special_tokens=False).input_ids[-1] + ) def _load_tokenizer(self, path: str, tokenizer_path: Optional[str], tokenizer_kwargs: dict): """ diff --git a/applications/ColossalMoE/infer.py b/applications/ColossalMoE/infer.py index c175fe9e3f3f..68ba61e3ea6f 100644 --- a/applications/ColossalMoE/infer.py +++ b/applications/ColossalMoE/infer.py @@ -96,7 +96,11 @@ def main(): if coordinator.rank == 0: text = ["Hello my name is"] else: - text = ["What's the largest country in the world?", "How many people live in China?", "帮我续写这首诗:离离原上草"] + text = [ + "What's the largest country in the world?", + "How many people live in China?", + "帮我续写这首诗:离离原上草", + ] tokenizer.pad_token = tokenizer.unk_token inputs = tokenizer(text, return_tensors="pt", padding=True).to(torch.cuda.current_device()) diff --git a/applications/ColossalMoE/train.py b/applications/ColossalMoE/train.py index 850236726a27..749db8b5158b 100644 --- a/applications/ColossalMoE/train.py +++ b/applications/ColossalMoE/train.py @@ -195,9 +195,9 @@ def main(): lr_scheduler = CosineAnnealingWarmupLR( optimizer=optimizer, total_steps=args.num_epochs * len(dataloader), - warmup_steps=args.warmup_steps - if args.warmup_steps is not None - else int(args.num_epochs * len(dataloader) * 0.025), + warmup_steps=( + args.warmup_steps if args.warmup_steps is not None else int(args.num_epochs * len(dataloader) * 0.025) + ), eta_min=0.1 * args.lr, ) diff --git a/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py b/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py index 80dbf47def2b..2f9750de33fd 100644 --- a/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py +++ b/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py @@ -7,6 +7,7 @@ https://github.com/langchain-ai/langchain The original code is licensed under the MIT license. """ + from __future__ import annotations import copy diff --git a/applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py b/applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py index a2b1f81e34b9..8cb8ef536b20 100644 --- a/applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py +++ b/applications/ColossalQA/colossalqa/chain/retrieval_qa/load_chain.py @@ -8,6 +8,7 @@ https://github.com/langchain-ai/langchain The original code is licensed under the MIT license. """ + import copy from typing import Any, Mapping, Optional, Protocol diff --git a/applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py b/applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py index bf7ad0ffce28..64e476438576 100644 --- a/applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py +++ b/applications/ColossalQA/colossalqa/chain/retrieval_qa/stuff.py @@ -7,6 +7,7 @@ https://github.com/langchain-ai/langchain The original code is licensed under the MIT license. """ + import copy from typing import Any, List diff --git a/applications/ColossalQA/colossalqa/data_loader/table_dataloader.py b/applications/ColossalQA/colossalqa/data_loader/table_dataloader.py index 29542466fa8f..0ad66f0ad999 100644 --- a/applications/ColossalQA/colossalqa/data_loader/table_dataloader.py +++ b/applications/ColossalQA/colossalqa/data_loader/table_dataloader.py @@ -2,7 +2,6 @@ Class for loading table type data. please refer to Pandas-Input/Output for file format details. """ - import glob import os diff --git a/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py b/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py index 3629778698fb..ca8d64f2293f 100644 --- a/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py +++ b/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py @@ -20,6 +20,7 @@ print(resp) # super-heavyweight awesome-natured yawning Australian creature! """ + import json from typing import Any, Mapping diff --git a/applications/ColossalQA/colossalqa/local/llm.py b/applications/ColossalQA/colossalqa/local/llm.py index 30a456c3d9c7..58a4811d9fdc 100644 --- a/applications/ColossalQA/colossalqa/local/llm.py +++ b/applications/ColossalQA/colossalqa/local/llm.py @@ -12,6 +12,7 @@ logger.info(llm(TEST_PROMPT_CHATGLM, max_new_tokens=100), verbose=True) """ + from typing import Any, List, Mapping, Optional import torch diff --git a/applications/ColossalQA/colossalqa/local/utils.py b/applications/ColossalQA/colossalqa/local/utils.py index ed90264cad8d..2cbd474bdbd2 100644 --- a/applications/ColossalQA/colossalqa/local/utils.py +++ b/applications/ColossalQA/colossalqa/local/utils.py @@ -1,6 +1,7 @@ """ Generation utilities """ + import json from typing import List diff --git a/applications/ColossalQA/colossalqa/memory.py b/applications/ColossalQA/colossalqa/memory.py index 7a5512281035..d8de544a59e6 100644 --- a/applications/ColossalQA/colossalqa/memory.py +++ b/applications/ColossalQA/colossalqa/memory.py @@ -2,6 +2,7 @@ Implement a memory class for storing conversation history Support long term and short term memory """ + from typing import Any, Dict, List from colossalqa.chain.memory.summary import ConversationSummaryMemory diff --git a/applications/ColossalQA/colossalqa/mylogging.py b/applications/ColossalQA/colossalqa/mylogging.py index 574c33b41685..67e2a83ed141 100644 --- a/applications/ColossalQA/colossalqa/mylogging.py +++ b/applications/ColossalQA/colossalqa/mylogging.py @@ -1,6 +1,7 @@ """ Class for logging with extra control for debugging """ + import logging diff --git a/applications/ColossalQA/colossalqa/retrieval_conversation_en.py b/applications/ColossalQA/colossalqa/retrieval_conversation_en.py index 96bce82b9ee0..cab16807579e 100644 --- a/applications/ColossalQA/colossalqa/retrieval_conversation_en.py +++ b/applications/ColossalQA/colossalqa/retrieval_conversation_en.py @@ -1,6 +1,7 @@ """ Script for Chinese retrieval based conversation system backed by ChatGLM """ + from typing import Tuple from colossalqa.chain.retrieval_qa.base import RetrievalQA diff --git a/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py b/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py index 6e77bb2aee17..a991b202e8ee 100644 --- a/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py +++ b/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py @@ -1,6 +1,7 @@ """ Multilingual retrieval based conversation system """ + from typing import List from colossalqa.data_loader.document_loader import DocumentLoader diff --git a/applications/ColossalQA/colossalqa/retrieval_conversation_zh.py b/applications/ColossalQA/colossalqa/retrieval_conversation_zh.py index 4eef41947d11..6c9b69117f8a 100644 --- a/applications/ColossalQA/colossalqa/retrieval_conversation_zh.py +++ b/applications/ColossalQA/colossalqa/retrieval_conversation_zh.py @@ -1,6 +1,7 @@ """ Script for Chinese retrieval based conversation system backed by ChatGLM """ + from typing import Tuple from colossalqa.chain.retrieval_qa.base import RetrievalQA diff --git a/applications/ColossalQA/colossalqa/retriever.py b/applications/ColossalQA/colossalqa/retriever.py index 6a0c69859ac7..ec4941ddd0a7 100644 --- a/applications/ColossalQA/colossalqa/retriever.py +++ b/applications/ColossalQA/colossalqa/retriever.py @@ -1,6 +1,7 @@ """ Code for custom retriver with incremental update """ + import copy import hashlib import os diff --git a/applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py b/applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py index 3815f5ed2621..697af484b3fc 100644 --- a/applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py +++ b/applications/ColossalQA/colossalqa/text_splitter/chinese_text_splitter.py @@ -1,6 +1,7 @@ """ Code for Chinese text splitter """ + from typing import Any, List, Optional from colossalqa.text_splitter.utils import get_cleaned_paragraph diff --git a/applications/ColossalQA/examples/retrieval_conversation_en.py b/applications/ColossalQA/examples/retrieval_conversation_en.py index fe2b9b4db3c2..b7339de933bb 100644 --- a/applications/ColossalQA/examples/retrieval_conversation_en.py +++ b/applications/ColossalQA/examples/retrieval_conversation_en.py @@ -1,6 +1,7 @@ """ Script for English retrieval based conversation system backed by LLaMa2 """ + import argparse import os diff --git a/applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py b/applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py index d4ba73b9468c..a0c90e7c5d8f 100644 --- a/applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py +++ b/applications/ColossalQA/examples/retrieval_conversation_en_customer_service.py @@ -1,6 +1,7 @@ """ Script for English retrieval based conversation system backed by LLaMa2 """ + import argparse import json import os diff --git a/applications/ColossalQA/examples/retrieval_conversation_zh.py b/applications/ColossalQA/examples/retrieval_conversation_zh.py index b143b9baacc1..96641edf5290 100644 --- a/applications/ColossalQA/examples/retrieval_conversation_zh.py +++ b/applications/ColossalQA/examples/retrieval_conversation_zh.py @@ -1,6 +1,7 @@ """ Script for Chinese retrieval based conversation system backed by ChatGLM """ + import argparse import os diff --git a/applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py b/applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py index adb6544941f0..865ade5bb2d2 100644 --- a/applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py +++ b/applications/ColossalQA/examples/retrieval_intent_classification_zh_customer_service.py @@ -1,6 +1,7 @@ """ Script for English retrieval based conversation system backed by LLaMa2 """ + import argparse import os diff --git a/colossalai/auto_parallel/meta_profiler/meta_registry/conv.py b/colossalai/auto_parallel/meta_profiler/meta_registry/conv.py index 2f630995cdbc..b1e32e885783 100644 --- a/colossalai/auto_parallel/meta_profiler/meta_registry/conv.py +++ b/colossalai/auto_parallel/meta_profiler/meta_registry/conv.py @@ -107,20 +107,22 @@ def convnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, L # NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward fwd_memory_cost = MemoryCost( activation=compute_size_in_bytes([input_tensor, output_tensor]), - parameter=compute_size_in_bytes([weight_tensor, bias_tensor]) - if has_bias - else compute_size_in_bytes(weight_tensor), + parameter=( + compute_size_in_bytes([weight_tensor, bias_tensor]) if has_bias else compute_size_in_bytes(weight_tensor) + ), temp=0, buffer=0, ) bwd_memory_cost = MemoryCost( - activation=compute_size_in_bytes([input_tensor, weight_tensor, bias_tensor]) - if has_bias - else compute_size_in_bytes([input_tensor, weight_tensor]), - parameter=compute_size_in_bytes([weight_tensor, bias_tensor]) - if has_bias - else compute_size_in_bytes(weight_tensor), + activation=( + compute_size_in_bytes([input_tensor, weight_tensor, bias_tensor]) + if has_bias + else compute_size_in_bytes([input_tensor, weight_tensor]) + ), + parameter=( + compute_size_in_bytes([weight_tensor, bias_tensor]) if has_bias else compute_size_in_bytes(weight_tensor) + ), temp=0, buffer=0, ) diff --git a/colossalai/inference/engine/modeling/_utils.py b/colossalai/inference/engine/modeling/_utils.py index 068b64b4f829..46d4222c4ac2 100644 --- a/colossalai/inference/engine/modeling/_utils.py +++ b/colossalai/inference/engine/modeling/_utils.py @@ -1,6 +1,7 @@ """ Utils for model inference """ + import os import torch diff --git a/colossalai/inference/kv_cache/batch_infer_state.py b/colossalai/inference/kv_cache/batch_infer_state.py index f707a86df37e..b72610899abc 100644 --- a/colossalai/inference/kv_cache/batch_infer_state.py +++ b/colossalai/inference/kv_cache/batch_infer_state.py @@ -14,6 +14,7 @@ class BatchInferState: Information to be passed and used for a batch of inputs during a single model forward """ + batch_size: int max_len_in_batch: int diff --git a/colossalai/inference/kv_cache/kvcache_manager.py b/colossalai/inference/kv_cache/kvcache_manager.py index dda46a756cc3..467f8d441aa2 100644 --- a/colossalai/inference/kv_cache/kvcache_manager.py +++ b/colossalai/inference/kv_cache/kvcache_manager.py @@ -4,6 +4,7 @@ https://github.com/ModelTC/lightllm/blob/050af3ce65edca617e2f30ec2479397d5bb248c9/lightllm/common/mem_manager.py we slightly changed it to make it suitable for our colossal-ai shardformer TP-engine design. """ + import torch from transformers.utils import logging diff --git a/colossalai/legacy/context/process_group_initializer/initializer_2d.py b/colossalai/legacy/context/process_group_initializer/initializer_2d.py index 1c08d4d4296a..fc51844b661f 100644 --- a/colossalai/legacy/context/process_group_initializer/initializer_2d.py +++ b/colossalai/legacy/context/process_group_initializer/initializer_2d.py @@ -138,9 +138,7 @@ def __init__(self, *args, **kwargs): self.num_group = self.world_size // self.tensor_parallel_size self.summa_dim = int(math.sqrt(self.tensor_parallel_size)) - assert ( - self.tensor_parallel_size == self.summa_dim**2 - ), "2D summa dim should equal to tensor parallel size ^ 0.5" + assert self.tensor_parallel_size == self.summa_dim**2, "2D summa dim should equal to tensor parallel size ^ 0.5" _check_summa_env_var(self.summa_dim) self.col_initializer = Initializer_2D_Col(self.num_group, self.summa_dim, *args, **kwargs) diff --git a/colossalai/legacy/inference/async_engine.py b/colossalai/legacy/inference/async_engine.py index d0890ba3e9fc..b4c523669af2 100644 --- a/colossalai/legacy/inference/async_engine.py +++ b/colossalai/legacy/inference/async_engine.py @@ -54,7 +54,6 @@ async def __anext__(self) -> RequestOutput: class Async_Engine: - """ Use an engine to launch RAY Driver --> RAY Worker --> Async_Manager Background loop: inference reqs in waiting list (Listen) diff --git a/colossalai/legacy/inference/dynamic_batching/io_struct.py b/colossalai/legacy/inference/dynamic_batching/io_struct.py index fc5ecfe5796b..abc41cc8e909 100644 --- a/colossalai/legacy/inference/dynamic_batching/io_struct.py +++ b/colossalai/legacy/inference/dynamic_batching/io_struct.py @@ -118,16 +118,16 @@ def __len__(self): class BatchTokenIdOut: def __init__(self): - self.reqs_infs: List[ - Tuple[str, int, Dict, bool, bool] - ] = [] # [req_id, new_token_id, gen_metadata, finished_state, abort_state] + self.reqs_infs: List[Tuple[str, int, Dict, bool, bool]] = ( + [] + ) # [req_id, new_token_id, gen_metadata, finished_state, abort_state] class BatchStrOut: def __init__(self): - self.reqs_infs: List[ - Tuple[str, str, Dict, bool, bool] - ] = [] # [req_id, token_str, gen_metadata, finished_state, abort_state] + self.reqs_infs: List[Tuple[str, str, Dict, bool, bool]] = ( + [] + ) # [req_id, token_str, gen_metadata, finished_state, abort_state] class AbortReq: diff --git a/colossalai/legacy/inference/hybridengine/modeling/_utils.py b/colossalai/legacy/inference/hybridengine/modeling/_utils.py index 068b64b4f829..46d4222c4ac2 100644 --- a/colossalai/legacy/inference/hybridengine/modeling/_utils.py +++ b/colossalai/legacy/inference/hybridengine/modeling/_utils.py @@ -1,6 +1,7 @@ """ Utils for model inference """ + import os import torch diff --git a/colossalai/legacy/inference/tensor_parallel/batch_infer_state.py b/colossalai/legacy/inference/tensor_parallel/batch_infer_state.py index f707a86df37e..b72610899abc 100644 --- a/colossalai/legacy/inference/tensor_parallel/batch_infer_state.py +++ b/colossalai/legacy/inference/tensor_parallel/batch_infer_state.py @@ -14,6 +14,7 @@ class BatchInferState: Information to be passed and used for a batch of inputs during a single model forward """ + batch_size: int max_len_in_batch: int diff --git a/colossalai/legacy/inference/tensor_parallel/kvcache_manager.py b/colossalai/legacy/inference/tensor_parallel/kvcache_manager.py index 91bb96a1f1f0..8c54fda2602a 100644 --- a/colossalai/legacy/inference/tensor_parallel/kvcache_manager.py +++ b/colossalai/legacy/inference/tensor_parallel/kvcache_manager.py @@ -4,6 +4,7 @@ https://github.com/ModelTC/lightllm/blob/050af3ce65edca617e2f30ec2479397d5bb248c9/lightllm/common/mem_manager.py we slightly changed it to make it suitable for our colossal-ai shardformer TP-engine design. """ + import torch from transformers.utils import logging diff --git a/colossalai/legacy/inference/tensor_parallel/modeling/_utils.py b/colossalai/legacy/inference/tensor_parallel/modeling/_utils.py index 068b64b4f829..46d4222c4ac2 100644 --- a/colossalai/legacy/inference/tensor_parallel/modeling/_utils.py +++ b/colossalai/legacy/inference/tensor_parallel/modeling/_utils.py @@ -1,6 +1,7 @@ """ Utils for model inference """ + import os import torch diff --git a/colossalai/shardformer/examples/performance_benchmark.py b/colossalai/shardformer/examples/performance_benchmark.py index 81215dcdf5d4..981525359c91 100644 --- a/colossalai/shardformer/examples/performance_benchmark.py +++ b/colossalai/shardformer/examples/performance_benchmark.py @@ -1,6 +1,7 @@ """ Shardformer Benchmark """ + import torch import torch.distributed as dist import transformers diff --git a/colossalai/shardformer/modeling/chatglm2_6b/modeling_chatglm.py b/colossalai/shardformer/modeling/chatglm2_6b/modeling_chatglm.py index bf581300a7b1..6ae4b06e517a 100644 --- a/colossalai/shardformer/modeling/chatglm2_6b/modeling_chatglm.py +++ b/colossalai/shardformer/modeling/chatglm2_6b/modeling_chatglm.py @@ -33,6 +33,7 @@ Note that the license is subject to update to a more comprehensive version. For any questions related to the license and copyright, please contact us at glm-130b@googlegroups.com. """ + """ PyTorch ChatGLM model. """ import copy diff --git a/colossalai/shardformer/policies/base_policy.py b/colossalai/shardformer/policies/base_policy.py index d67ab0a3c6bb..626dea86b66d 100644 --- a/colossalai/shardformer/policies/base_policy.py +++ b/colossalai/shardformer/policies/base_policy.py @@ -28,6 +28,7 @@ class SubModuleReplacementDescription: kwargs (Dict[str, Any]): the dictionary used to pass extra arguments to the `ParallelModule.from_native_module` method. ignore_if_not_exist (bool): if the submodule does not exist, ignore it or raise an exception """ + suffix: str target_module: Union[ParallelModule, BaseLayerNorm] kwargs: Dict[str, Any] = None @@ -54,6 +55,7 @@ def example_replace_weight(module: torch.nn.Module): object which specifies the module to be replaced and the target module used to replacement. method_replace (Dict[str, Callable]): key is the method name, value is the method for replacement """ + attribute_replacement: Dict[str, Any] = None param_replacement: List[Callable] = None sub_module_replacement: List[SubModuleReplacementDescription] = None diff --git a/colossalai/shardformer/shard/grad_ckpt_config.py b/colossalai/shardformer/shard/grad_ckpt_config.py index 9c6c2b54ea39..f2d18f3af8d2 100644 --- a/colossalai/shardformer/shard/grad_ckpt_config.py +++ b/colossalai/shardformer/shard/grad_ckpt_config.py @@ -22,6 +22,7 @@ class PipelineGradientCheckpointConfig(GradientCheckpointConfig): 2. Customize # ckpt layers assigned to each stage. This takes precedence over `gradient_checkpointing_ratio`. """ + """ Args: gradient_checkpointing_ratio (Optional[float]): The ratio of gradient checkpointing. It can only be used in pipeline parallelism. Defaults to None. diff --git a/colossalai/shardformer/shard/shard_config.py b/colossalai/shardformer/shard/shard_config.py index 7489873c2ed6..c0f28c947f8b 100644 --- a/colossalai/shardformer/shard/shard_config.py +++ b/colossalai/shardformer/shard/shard_config.py @@ -30,6 +30,7 @@ class ShardConfig: gradient_checkpoint_config (Optional[GradientCheckpointConfig]): The gradient checkpoint config. Defaults to None. enable_all_optimization (bool): Whether to turn on all optimization tools including 'fused normalization', 'flash attention', 'JIT fused operators', 'sequence parallelism' and 'sequence overlap'. Defaults to False. """ + tensor_parallel_process_group: Optional[ProcessGroup] = None sequence_parallel_process_group: Optional[ProcessGroup] = None pipeline_stage_manager: Optional[PipelineStageManager] = None diff --git a/examples/community/roberta/pretraining/nvidia_bert_dataset_provider.py b/examples/community/roberta/pretraining/nvidia_bert_dataset_provider.py index 09677a6195cb..4d08d9941133 100644 --- a/examples/community/roberta/pretraining/nvidia_bert_dataset_provider.py +++ b/examples/community/roberta/pretraining/nvidia_bert_dataset_provider.py @@ -52,9 +52,11 @@ def __len__(self): def __getitem__(self, index): [input_ids, input_mask, segment_ids, masked_lm_labels] = [ - torch.from_numpy(input[index].astype(np.int64)) - if indice < 5 - else torch.from_numpy(np.asarray(input[index].astype(np.int64))) + ( + torch.from_numpy(input[index].astype(np.int64)) + if indice < 5 + else torch.from_numpy(np.asarray(input[index].astype(np.int64))) + ) for indice, input in enumerate(self.inputs) ] diff --git a/examples/images/diffusion/ldm/models/diffusion/ddpm.py b/examples/images/diffusion/ldm/models/diffusion/ddpm.py index 20e26256e18e..3cf6aceb5197 100644 --- a/examples/images/diffusion/ldm/models/diffusion/ddpm.py +++ b/examples/images/diffusion/ldm/models/diffusion/ddpm.py @@ -229,9 +229,7 @@ def register_schedule( ) if self.parameterization == "eps": - lvlb_weights = self.betas**2 / ( - 2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod) - ) + lvlb_weights = self.betas**2 / (2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)) elif self.parameterization == "x0": lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2.0 * 1 - torch.Tensor(alphas_cumprod)) elif self.parameterization == "v": @@ -1186,9 +1184,11 @@ def progressive_denoising( if cond is not None: if isinstance(cond, dict): cond = { - key: cond[key][:batch_size] - if not isinstance(cond[key], list) - else list(map(lambda x: x[:batch_size], cond[key])) + key: ( + cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + ) for key in cond } else: @@ -1321,9 +1321,11 @@ def sample( if cond is not None: if isinstance(cond, dict): cond = { - key: cond[key][:batch_size] - if not isinstance(cond[key], list) - else list(map(lambda x: x[:batch_size], cond[key])) + key: ( + cond[key][:batch_size] + if not isinstance(cond[key], list) + else list(map(lambda x: x[:batch_size], cond[key])) + ) for key in cond } else: diff --git a/examples/images/diffusion/ldm/models/diffusion/dpm_solver/sampler.py b/examples/images/diffusion/ldm/models/diffusion/dpm_solver/sampler.py index 55dac8555e5f..4104fe3b0df4 100644 --- a/examples/images/diffusion/ldm/models/diffusion/dpm_solver/sampler.py +++ b/examples/images/diffusion/ldm/models/diffusion/dpm_solver/sampler.py @@ -1,4 +1,5 @@ """SAMPLING ONLY.""" + import torch from .dpm_solver import DPM_Solver, NoiseScheduleVP, model_wrapper diff --git a/examples/images/diffusion/ldm/modules/diffusionmodules/openaimodel.py b/examples/images/diffusion/ldm/modules/diffusionmodules/openaimodel.py index 6c80f3229ce3..afde5dfd4430 100644 --- a/examples/images/diffusion/ldm/modules/diffusionmodules/openaimodel.py +++ b/examples/images/diffusion/ldm/modules/diffusionmodules/openaimodel.py @@ -640,23 +640,25 @@ def __init__( use_checkpoint=use_checkpoint, use_scale_shift_norm=use_scale_shift_norm, ), - AttentionBlock( - ch, - use_checkpoint=use_checkpoint, - num_heads=num_heads, - num_head_channels=dim_head, - use_new_attention_order=use_new_attention_order, - ) - if not use_spatial_transformer - else SpatialTransformer( # always uses a self-attn - ch, - num_heads, - dim_head, - depth=transformer_depth, - context_dim=context_dim, - disable_self_attn=disable_middle_self_attn, - use_linear=use_linear_in_transformer, - use_checkpoint=use_checkpoint, + ( + AttentionBlock( + ch, + use_checkpoint=use_checkpoint, + num_heads=num_heads, + num_head_channels=dim_head, + use_new_attention_order=use_new_attention_order, + ) + if not use_spatial_transformer + else SpatialTransformer( # always uses a self-attn + ch, + num_heads, + dim_head, + depth=transformer_depth, + context_dim=context_dim, + disable_self_attn=disable_middle_self_attn, + use_linear=use_linear_in_transformer, + use_checkpoint=use_checkpoint, + ) ), ResBlock( ch, diff --git a/examples/images/diffusion/ldm/modules/midas/midas/midas_net.py b/examples/images/diffusion/ldm/modules/midas/midas/midas_net.py index 0dd87b59619c..8c13f39ff48f 100644 --- a/examples/images/diffusion/ldm/modules/midas/midas/midas_net.py +++ b/examples/images/diffusion/ldm/modules/midas/midas/midas_net.py @@ -2,6 +2,7 @@ This file contains code that is adapted from https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py """ + import torch import torch.nn as nn diff --git a/examples/images/diffusion/ldm/modules/midas/midas/midas_net_custom.py b/examples/images/diffusion/ldm/modules/midas/midas/midas_net_custom.py index 4d30744c46d3..c79581afcd2d 100644 --- a/examples/images/diffusion/ldm/modules/midas/midas/midas_net_custom.py +++ b/examples/images/diffusion/ldm/modules/midas/midas/midas_net_custom.py @@ -2,6 +2,7 @@ This file contains code that is adapted from https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py """ + import torch import torch.nn as nn diff --git a/examples/images/diffusion/ldm/modules/midas/utils.py b/examples/images/diffusion/ldm/modules/midas/utils.py index 1428d42b2445..f7fc7dcc98a4 100644 --- a/examples/images/diffusion/ldm/modules/midas/utils.py +++ b/examples/images/diffusion/ldm/modules/midas/utils.py @@ -1,4 +1,5 @@ """Utils for monoDepth.""" + import re import sys diff --git a/examples/tutorial/sequence_parallel/data/datasets/helpers.cpp b/examples/tutorial/sequence_parallel/data/datasets/helpers.cpp index 52977e63181f..fe9968177fb1 100644 --- a/examples/tutorial/sequence_parallel/data/datasets/helpers.cpp +++ b/examples/tutorial/sequence_parallel/data/datasets/helpers.cpp @@ -369,9 +369,9 @@ py::array build_mapping_impl(const py::array_t& docs_, } } // for (auto sent_index=sent_index_first; ... - } // if (num_remain_sent > 1) { - } // for (int doc=0; doc < num_docs; ++doc) { - } // for (int epoch=0; epoch < num_epochs; ++epoch) { + } // if (num_remain_sent > 1) { + } // for (int doc=0; doc < num_docs; ++doc) { + } // for (int epoch=0; epoch < num_epochs; ++epoch) { if (!second) { if (verbose) { @@ -606,9 +606,9 @@ py::array build_blocks_mapping_impl( num_sent = 0; } } // for (auto sent_index=sent_index_first; ... - } // if (num_remain_sent > 1) { - } // for (int doc=0; doc < num_docs; ++doc) { - } // for (int epoch=0; epoch < num_epochs; ++epoch) { + } // if (num_remain_sent > 1) { + } // for (int doc=0; doc < num_docs; ++doc) { + } // for (int epoch=0; epoch < num_epochs; ++epoch) { if (!second) { if (verbose) { diff --git a/extensions/csrc/arm/cpu_adam_arm.h b/extensions/csrc/arm/cpu_adam_arm.h index c731850edc31..d48968e21682 100644 --- a/extensions/csrc/arm/cpu_adam_arm.h +++ b/extensions/csrc/arm/cpu_adam_arm.h @@ -4,7 +4,7 @@ #include -#define ROUND_DOWN(size, step) ((size) & ~((step)-1)) +#define ROUND_DOWN(size, step) ((size) & ~((step) - 1)) #define TILE (128 * 1024 * 1024) #if defined(__aarch64__) diff --git a/extensions/csrc/cuda/cpu_adam.h b/extensions/csrc/cuda/cpu_adam.h index db1f26d5f6da..45e1dde6242d 100644 --- a/extensions/csrc/cuda/cpu_adam.h +++ b/extensions/csrc/cuda/cpu_adam.h @@ -32,7 +32,7 @@ SOFTWARE #include #endif -#define ROUND_DOWN(size, step) ((size) & ~((step)-1)) +#define ROUND_DOWN(size, step) ((size) & ~((step) - 1)) #define TILE (128 * 1024 * 1024) #if defined(__AVX512__) or defined(__AVX256__) or defined(__AVX2__) diff --git a/tests/kit/model_zoo/torchvision/torchvision.py b/tests/kit/model_zoo/torchvision/torchvision.py index 57b633e9d676..c0524d089cfe 100644 --- a/tests/kit/model_zoo/torchvision/torchvision.py +++ b/tests/kit/model_zoo/torchvision/torchvision.py @@ -34,14 +34,14 @@ def swin_s(): # special output transform fn -google_net_output_transform_fn = ( - lambda x: dict(output=sum(x)) if isinstance(x, torchvision.models.GoogLeNetOutputs) else dict(output=x) +google_net_output_transform_fn = lambda x: ( + dict(output=sum(x)) if isinstance(x, torchvision.models.GoogLeNetOutputs) else dict(output=x) ) -swin_s_output_output_transform_fn = ( - lambda x: {f"output{idx}": val for idx, val in enumerate(x)} if isinstance(x, tuple) else dict(output=x) +swin_s_output_output_transform_fn = lambda x: ( + {f"output{idx}": val for idx, val in enumerate(x)} if isinstance(x, tuple) else dict(output=x) ) -inception_v3_output_transform_fn = ( - lambda x: dict(output=sum(x)) if isinstance(x, torchvision.models.InceptionOutputs) else dict(output=x) +inception_v3_output_transform_fn = lambda x: ( + dict(output=sum(x)) if isinstance(x, torchvision.models.InceptionOutputs) else dict(output=x) ) model_zoo.register( diff --git a/tests/test_lazy/test_models.py b/tests/test_lazy/test_models.py index d0c4cd0a7c48..d678f44089a8 100644 --- a/tests/test_lazy/test_models.py +++ b/tests/test_lazy/test_models.py @@ -7,9 +7,11 @@ @pytest.mark.skipif(not SUPPORT_LAZY, reason="requires torch >= 1.12.0") @pytest.mark.parametrize( "subset", - [COMMON_MODELS] - if IS_FAST_TEST - else ["torchvision", "diffusers", "timm", "transformers", "torchaudio", "deepfm", "dlrm"], + ( + [COMMON_MODELS] + if IS_FAST_TEST + else ["torchvision", "diffusers", "timm", "transformers", "torchaudio", "deepfm", "dlrm"] + ), ) @pytest.mark.parametrize("default_device", ["cpu", "cuda"]) def test_torchvision_models_lazy_init(subset, default_device):