In [None]:
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

inference_16k_pipline = pipeline(
    task=Tasks.auto_speech_recognition,
    # model='damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline') #  offline
    # model='damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online') # online
    # model='speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline' # offline large
    # model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch' # parafomer offline
    model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary' # paraformer 长音频

# rec_result = inference_16k_pipline(audio_in='https://modelscope.oss-cn-beijing.aliyuncs.com/test/audios/asr_example.wav')
# print(rec_result)

输入音频支持wav与pcm格式音频，以wav格式输入为例，支持以下几种输入方式：
* wav文件路径，例如：data/test/audios/asr_example.wav
* pcm文件路径，例如：data/test/audios/asr_example.pcm
* wav文件url，例如：https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav
* wav二进制数据，格式bytes，例如：用户直接从文件里读出bytes数据或者是麦克风录出bytes数据。
* 已解析的audio音频，例如：audio, rate = soundfile.read("asr_example_zh.wav")，类型为numpy.ndarray或者torch.Tensor。

识别结果输出路径结构如下：
```
tree output_dir/
output_dir/
└── 1best_recog
    ├── rtf
    ├── score
    ├── text
    └── time_stamp

1 directory, 4 files
```
rtf：计算过程耗时统计
score：识别路径得分
text：语音识别结果文件
time_stamp：时间戳结果文件

ASR、VAD、PUNC模型自由组合
可根据使用需求对VAD和PUNC标点模型进行自由组合，使用方式如下：
```python
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
    vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch',
    punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
)
```
如需加入LM模型，可增加配置lm_model='damo/speech_transformer_lm_zh-cn-common-vocab8404-pytorch'。

长音频版本模型中集成了VAD、ASR、标点模型，若不使用VAD或标点模型，可设置参数vad_model=""或punc_model=""，具体使用方式可参考[文档](https://github.com/alibaba-damo-academy/FunASR/discussions/134)，例如：
```python
inference_pipeline = pipeline(
    task=Tasks.auto_speech_recognition,
    model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
    vad_model='',
    punc_model='',
)
```
长音频版本模型默认开启时间戳，若不使用时间戳，可通过传入参数param_dict['use_timestamp'] = False关闭时间戳，使用方式如下：
```python
param_dict['use_timestamp'] = False
rec_result = inference_pipeline(audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav', param_dict=param_dict)

)
```

In [4]:
def Paraformer_longaudio_model(use_vad_model=True,use_punc_model=True,use_lm_model=False):
    
    if use_vad_model:
        vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch'
    else:
        vad_model=''
        
    if use_punc_model:
        punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch'
    else:
        punc_model=''
        
    if use_lm_model:
        inference_pipeline = pipeline(
            task=Tasks.auto_speech_recognition,
            # defaults to combine VAD, ASR and PUNC
            model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
            vad_model=vad_model,
            punc_model=punc_model,
            lm_model='damo/speech_transformer_lm_zh-cn-common-vocab8404-pytorch',
            lm_weight=0.15,
            beam_size=10,
            )
    else:
        inference_pipeline = pipeline(
            task=Tasks.auto_speech_recognition,
            # defaults to combine VAD, ASR and PUNC
            model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
            vad_model=vad_model,
            punc_model=punc_model,
            )           
        
    return inference_pipeline #这里先输出模型, 以避免后续模型重复生成;

def Paraformer_longaudio_recognition(audio_data,use_timestamp=True,)
    """
    audio_data: 为输入音频,可以wav,二进制数据(bytes);url    
    """
    param_dict['use_timestamp'] = use_timestamp
    rec_result = inference_pipeline(audio_in=audio_data, param_dict=param_dict)
    
    return rec_result

In [None]:
if __name__ == "__main__":
  
    with gr.Blocks(
        theme="soft",
        title="UniASR语音实时识别",
    ) as demo:
        gr.Markdown(
            """[**语音识别**](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/summary)
                [**长语音模型**](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)
            > 1. 录音,或者上传音频wav格式
            > 1. 选择是否使用 vad(voice activity detection), punc(标点), lm (NNLM) 诸模型
            > 1. 点击,"一键识别",输出语音文字
            """)
        with gr.row():
            with gr.column(variant='panel'):
                inp0 = gr.Radio(
                    choices=["microphone", "upload"],
                    value="upload",
                    type="value",
                    label="选择音频来源",
                    show_label=True,
                    )
                inp1 = gr.Audio(
                    source="upload",
                    type="filepath",
                    show_label=True,
                    interactive=True,
                    )
                inp2 = gr.CheckboxGroup(["VAD", "PUNC","NNLM"], label="请选择是否使用下面模型",
                                        value=["VAD", "PUNC"],show_label=True )
                
        img_numpy = inp0.change(image_source, inputs=inp0, outputs=inp1)
        out = gr.Text(label='车辆类别,识别车牌号', show_copy_button=True,show_label=True,
                     placeholder="['小型汽车'],['浙A88888']")
        submit_button = gr.Button(value='提交')
        submit_button.click(VehiclePlate_Recognition,img_numpy,out)
        
        
        demo.queue()
        demo.launch(show_error=True, share=True)