diff --git a/src/emd/models/llms/deepseek.py b/src/emd/models/llms/deepseek.py index d76edc1e..dc202151 100644 --- a/src/emd/models/llms/deepseek.py +++ b/src/emd/models/llms/deepseek.py @@ -55,7 +55,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", - # modelscope_model_id="Qwen/Qwen2.5-32B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -85,7 +85,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -116,7 +116,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -147,7 +147,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", @@ -244,7 +244,7 @@ ], allow_china_region=True, huggingface_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B", - # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct", + modelscope_model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-8B", require_huggingface_token=False, application_scenario="Agent, tool use, translation, summary", description="The latest series of DeepSeek LLMs for reasoning", diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py index 2236c4ff..153bf738 100644 --- a/src/emd/models/llms/qwen.py +++ b/src/emd/models/llms/qwen.py @@ -78,8 +78,7 @@ model_id = "Qwen2.5-72B-Instruct-AWQ", supported_engines=[ vllm_qwen2d5_engine064, - tgi_qwen2d5_72b_engine064, - tgi_qwen2d5_72b_on_inf2 + tgi_qwen2d5_72b_engine064 ], supported_instances=[ g5d12xlarge_instance, @@ -108,35 +107,35 @@ ) ) -Model.register( - dict( - model_id = "Qwen2.5-72B-Instruct-AWQ-inf2", - supported_engines=[ - tgi_qwen2d5_72b_on_inf2 - ], - supported_instances=[ - inf2d24xlarge_instance, - local_instance - ], - supported_services=[ - sagemaker_service, - sagemaker_async_service, - ecs_service, - local_service - ], - supported_frameworks=[ - fastapi_framework - ], - allow_china_region=True, - huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", - modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", - require_huggingface_token=False, - application_scenario="Agent, tool use, translation, summary", - description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", - model_type=ModelType.LLM, - model_series=QWEN2D5_SERIES - ) -) +# Model.register( +# dict( +# model_id = "Qwen2.5-72B-Instruct-AWQ-inf2", +# supported_engines=[ +# tgi_qwen2d5_72b_on_inf2 +# ], +# supported_instances=[ +# inf2d24xlarge_instance, +# local_instance +# ], +# supported_services=[ +# sagemaker_service, +# sagemaker_async_service, +# ecs_service, +# local_service +# ], +# supported_frameworks=[ +# fastapi_framework +# ], +# allow_china_region=True, +# huggingface_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", +# modelscope_model_id="Qwen/Qwen2.5-72B-Instruct-AWQ", +# require_huggingface_token=False, +# application_scenario="Agent, tool use, translation, summary", +# description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", +# model_type=ModelType.LLM, +# model_series=QWEN2D5_SERIES +# ) +# ) Model.register( @@ -230,33 +229,33 @@ ) ) -Model.register( - dict( - model_id = "Qwen2.5-32B-Instruct-inf2", - supported_engines=[tgi_qwen2d5_72b_on_inf2], - supported_instances=[ - inf2d24xlarge_instance, - local_instance - ], - supported_services=[ - sagemaker_service, - sagemaker_async_service, - ecs_service, - local_service - ], - supported_frameworks=[ - fastapi_framework - ], - allow_china_region=True, - huggingface_model_id="Qwen/Qwen2.5-32B-Instruct", - modelscope_model_id="Qwen/Qwen2.5-32B-Instruct", - require_huggingface_token=False, - application_scenario="Agent, tool use, translation, summary", - description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", - model_type=ModelType.LLM, - model_series=QWEN2D5_SERIES - ) -) +# Model.register( +# dict( +# model_id = "Qwen2.5-32B-Instruct-inf2", +# supported_engines=[tgi_qwen2d5_72b_on_inf2], +# supported_instances=[ +# inf2d24xlarge_instance, +# local_instance +# ], +# supported_services=[ +# sagemaker_service, +# sagemaker_async_service, +# ecs_service, +# local_service +# ], +# supported_frameworks=[ +# fastapi_framework +# ], +# allow_china_region=True, +# huggingface_model_id="Qwen/Qwen2.5-32B-Instruct", +# modelscope_model_id="Qwen/Qwen2.5-32B-Instruct", +# require_huggingface_token=False, +# application_scenario="Agent, tool use, translation, summary", +# description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", +# model_type=ModelType.LLM, +# model_series=QWEN2D5_SERIES +# ) +# ) Model.register( dict( diff --git a/src/emd/models/model.py b/src/emd/models/model.py index 43fde8c7..ac29d05a 100644 --- a/src/emd/models/model.py +++ b/src/emd/models/model.py @@ -10,6 +10,7 @@ FrameworkType, ModelType, ModelSeriesType, + ModelFilesDownloadSource # ModelPrepareMethod ) import boto3 @@ -183,6 +184,7 @@ class Model(ModelBase,Generic[T]): # download model files directly from s3 model_files_s3_path: Union[str,None] = None model_files_local_path: Union[str,None] = None + model_files_download_source: ModelFilesDownloadSource = ModelFilesDownloadSource.AUTO model_series: ModelSeries executable_config: Union[ExecutableConfig,None] = None diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py index 8427f7c7..5c50f522 100644 --- a/src/emd/models/utils/constants.py +++ b/src/emd/models/utils/constants.py @@ -131,13 +131,10 @@ class ServiceCode(ConstantBase): SAGEMAKER = "sagemaker" -# class ModelPrepareMethod(ConstantBase): -# UPLOAD_TO_S3 = "upload to s3" -# DOANLOWD_FROM_S3 = "download from s3" -# IGNORE = "ignore" - - - +class ModelFilesDownloadSource(ConstantBase): + HUGGINGFACE = "huggingface" + MODELSCOPE= "modelscope" + AUTO = "auto" class ServiceQuotaCode(ConstantBase): G5dXLARGE_ENDPOINT = "L-1928E07B" diff --git a/src/pipeline/deploy/prepare_model.py b/src/pipeline/deploy/prepare_model.py index f93f99b7..e6333943 100644 --- a/src/pipeline/deploy/prepare_model.py +++ b/src/pipeline/deploy/prepare_model.py @@ -5,7 +5,7 @@ from huggingface_hub import snapshot_download as hf_snapshot_download from modelscope import snapshot_download as ms_snapshot_download from emd.models import Model -from emd.models.utils.constants import ServiceType,EngineType +from emd.models.utils.constants import ServiceType,EngineType,ModelFilesDownloadSource from emd.utils.aws_service_utils import check_cn_region from emd.utils.logger_utils import get_logger from utils.common import upload_dir_to_s3_by_s5cmd,download_dir_from_s3_by_s5cmd @@ -110,15 +110,23 @@ def download_model_files(model:Model,model_dir=None): if engine_type == EngineType.COMFYUI: download_comfyui_model(model,model_dir=model_dir) else: - if check_cn_region(region): - try: - download_modelscope_model(model,model_dir=model_dir) - except Exception as e: - logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}") - logger.info("download from huggingface...") - download_huggingface_model(model, model_dir=model_dir) + if model.model_files_download_source == ModelFilesDownloadSource.AUTO: + if check_cn_region(region): + try: + download_modelscope_model(model,model_dir=model_dir) + except Exception as e: + logger.error(f"Error downloading {model.model_id} model from modelscope, error: {e}") + logger.info("download from huggingface...") + download_huggingface_model(model, model_dir=model_dir) + else: + download_huggingface_model(model,model_dir=model_dir) else: - download_huggingface_model(model,model_dir=model_dir) + if model.model_files_download_source == ModelFilesDownloadSource.HUGGINGFACE: + download_huggingface_model(model, model_dir=model_dir) + elif model.model_files_download_source == ModelFilesDownloadSource.MODELSCOPE: + download_modelscope_model(model, model_dir=model_dir) + else: + raise ValueError(f"Invalid model_files_download_source: {model.model_files_download_source}") def run(model:Model):#, model_s3_bucket, backend_type, service_type, region,args): diff --git a/src/pipeline/pipeline.py b/src/pipeline/pipeline.py index 39a05aa2..cd10c5f5 100644 --- a/src/pipeline/pipeline.py +++ b/src/pipeline/pipeline.py @@ -6,6 +6,7 @@ import json import logging from concurrent.futures import as_completed,ProcessPoolExecutor + from emd.models import Model from emd.constants import MODEL_DEFAULT_TAG,LOCAL_REGION from emd.models.utils.constants import FrameworkType,ServiceType,InstanceType