In [1]:
from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.preprocessors import TokenClassificationTransformersPreprocessor

model_id = 'iic/nlp_structbert_word-segmentation_chinese-base'
model = Model.from_pretrained(model_id)
tokenizer = TokenClassificationTransformersPreprocessor(model.model_dir)
pipeline_ins = pipeline(task=Tasks.word_segmentation, model=model, preprocessor=tokenizer)
result = pipeline_ins(input="今天天气不错，适合出去游玩")
print (result)
# {'output': '今天 天气 不错 ， 适合 出去 游玩'}

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'addict'

In [None]:
import os.path as osp
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.msdatasets import MsDataset
from modelscope.utils.constant import DownloadMode

# Step 1: 数据集准备，可以使用modelscope上已有的数据集，也可以自己在本地构建COCO数据集
train_dataset = MsDataset.load('person_detection_for_train', namespace="modelscope", split='train', download_mode=DownloadMode.FORCE_REDOWNLOAD)
val_dataset = MsDataset.load('person_detection_for_train', namespace="modelscope", split='validation', download_mode=DownloadMode.FORCE_REDOWNLOAD)

# Step 2: 相关参数设置
train_root_dir = train_dataset.config_kwargs['split_config']['train']
val_root_dir = val_dataset.config_kwargs['split_config']['validation']
train_img_dir = osp.join(train_root_dir, 'images')
val_img_dir = osp.join(val_root_dir, 'images')
train_anno_path = osp.join(train_root_dir, 'train.json')
val_anno_path = osp.join(val_root_dir, 'val.json')
kwargs = dict(
            model='iic/cv_tinynas_human-detection_damoyolo', # 使用DAMO-YOLO-S模型
            gpu_ids=[  # 指定训练使用的gpu
                0,
            ],
            batch_size=2, # batch_size, 每个gpu上的图片数等于batch_size // len(gpu_ids)
            max_epochs=3, # 总的训练epochs
            num_classes=1, # 自定义数据中的类别数
            load_pretrain=True, # 是否载入预训练模型，若为False，则为从头重新训练
            base_lr_per_img=0.001, # 每张图片的学习率，lr=base_lr_per_img*batch_size
            train_image_dir=train_img_dir, # 训练图片路径
            val_image_dir=val_img_dir, # 测试图片路径
            train_ann=train_anno_path, # 训练标注文件路径
            val_ann=val_anno_path, # 测试标注文件路径
            )

# Step 3: 开启训练任务
trainer = build_trainer(
            name=Trainers.tinynas_damoyolo, default_args=kwargs)
trainer.train()

#电商分词器

In [None]:
from modelscope.models import Model
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
# Version less than 1.1 please use TokenClassificationPreprocessor
from modelscope.preprocessors import TokenClassificationTransformersPreprocessor

model_id = 'iic/nlp_structbert_word-segmentation_chinese-base-ecommerce'
model = Model.from_pretrained(model_id)
tokenizer = TokenClassificationTransformersPreprocessor(model.model_dir)
pipeline_ins = pipeline(task=Tasks.word_segmentation, model=model, preprocessor=tokenizer)
result = pipeline_ins(input="收腰显瘦黑裙长裙")
print (result)
# {'output': '收腰 显瘦 黑裙 长裙'}

#越南语

In [None]:
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

word_segmentation_pipeline = pipeline(Tasks.word_segmentation, 'iic/nlp_xlmr_word-segmentation_viet', model_revision='v1.0.0')
result = word_segmentation_pipeline('Nền kinh tế lúc ấy đang đứng trước nghịch lý : giá hàng tăng , sản xuất đình trệ , tiền khan hiếm ...')

print(result)
#{'output': ['Nền', 'kinh tế', 'lúc', 'ấy', 'đang', 'đứng', 'trước', 'nghịch lý', ':', 'giá', 'hàng', 'tăng', ',', 'sản xuất', 'đình trệ', ',', 'tiền', 'khan hiếm', '...'], 'labels': []}