In [1]:
!pip install gradio langchain

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com




In [2]:
import sagemaker
import boto3

sess = sagemaker.Session()
region = sess.boto_session.region_name
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
default_bucket=None
if default_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    default_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']


print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {default_bucket}")
print(f"sagemaker session region: {region}")

sagemaker role arn: arn:aws:iam::808577411626:role/Slab01AIGCPracticerRole
sagemaker bucket: sagemaker-us-west-2-808577411626
sagemaker session region: us-west-2


In [3]:
# %%writefile ui_launcher_v.py
import gradio as gr
import random
import time
from sagemaker_endpoint_llm import SageMakerLLM
from kendra_retrieval_qa_chain import KendraLLMRetrieverQAChain
from custom_search_helper import WebSearcher
from custom_agent_helper import CustomExecutor


LLM_ENDPOINT_NAME = 'xxxx-xx-xx-xx-xx' # Paste here SageMaker Endpoint Name created in Step2
KENDRA_INDEX_ID = 'xxxx-xx-xx-xx-xx'    # Paste here Kendra index id created in Step1
RAPID_API_KEY = '---51-digit-key---' # use rapid api, can be applied in https://rapidapi.com/microsoft-azure-org-microsoft-cognitive-services/api/bing-web-search1/


KENDRA_LANG_CODE = 'zh' # full lang code list in https://docs.aws.amazon.com/kendra/latest/dg/in-adding-languages.html

llm = SageMakerLLM(
            SageMakerEndpointName = LLM_ENDPOINT_NAME,
            AWSRegion = region,
            LLMType = 'GLM-6b',
            LLMArgs={'top_p': 0.45, 'temperature': 0.45}
        )


kqa = KendraLLMRetrieverQAChain(
            KendraIndexId = KENDRA_INDEX_ID,
            KendraLanguageCode = KENDRA_LANG_CODE,
            AWSRegion = region,
            Llm = llm.get_llm()
        )

web_searcher = WebSearcher(
                apikey = RAPID_API_KEY,
                result_count = 5,
                lang_code = 'zh'       #### only google
            )

flat_agent = CustomExecutor(llm = llm.sm_llm,
                        retriever = kqa.kendra_retriever,
                        websearcher = web_searcher,
                        kendra_chain = None, # retriever will be overwritten if assigned
                        verbose=True
                       )

hier_agent = CustomExecutor(llm = llm.sm_llm,
                        retriever = kqa.kendra_retriever,
                        websearcher = web_searcher,
                        kendra_chain = kqa, # retriever will be overwritten if assigned
                        verbose=True
                       )

web_agent = CustomExecutor(llm = llm.sm_llm,
                        retriever = None,
                        websearcher = web_searcher,
                        kendra_chain = None, # retriever will be overwritten if assigned
                        verbose=True
                       )


theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm, text_size=gr.themes.sizes.text_sm)
with gr.Blocks(theme) as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label='User Input: ')
    
    with gr.Row():
        clear = gr.ClearButton([msg, chatbot])
        int_btn = gr.Button("InternalQA", variant="primary")
        hyb_btn = gr.Button("HybridQA\n(Beta..)")

    def respond(message, chat_history):
        bot_resp = hier_agent.query(message)
        chat_history.append((message, bot_resp))
        
        return "", chat_history
    
    def respond_internal(message, chat_history):
        bot_resp = kqa.kendra_chain_qa(message)
        chat_history.append((message, bot_resp['result']))
        
        return "", chat_history
        
    msg.submit(respond_internal, [msg, chatbot], [msg, chatbot])
    
    int_btn.click(respond_internal, [msg, chatbot], [msg, chatbot])
    hyb_btn.click(respond, [msg, chatbot], [msg, chatbot])

demo.launch()

Sagemaker notebooks may require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://952d088939c110c4f9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## Unit tests

In [5]:
# model test
llm.sm_llm('你最新的知识截止到什么时间')

'我是基于清华大学 KEG 实验室和智谱 AI 公司于 2023 年共同训练的语言模型开发的AI助手，我的知识截止到2023年。'

In [6]:
# retriever test
kqa.kendra_retriever.get_relevant_documents('AWS clean room支持的数据源有哪些')

[Document(page_content='Document Title: \nDocument Excerpt: \n目前只支持S3，其他数据源近期没有具体计划。\n', metadata={'source': '', 'title': '', 'excerpt': '目前只支持S3，其他数据源近期没有具体计划。', 'type': 'QUESTION_ANSWER'}),
 Document(page_content='Document Title: \nDocument Excerpt: \n对，目前必须在AWS上，而且必须是同一个region。\n', metadata={'source': '', 'title': '', 'excerpt': '对，目前必须在AWS上，而且必须是同一个region。', 'type': 'QUESTION_ANSWER'}),
 Document(page_content='Document Title: \nDocument Excerpt: \n能支持TB/GB级数据的查询。 一般查询延迟为几十秒到几分钟。默认计算容量为32 CRPUs， 目前这个默认计算容量不可设置，但是roadmap中未来打算让用户可以进行设置。(Slack中Ryan 提到，如果引擎中任务有积压，它能够scale up）\n', metadata={'source': '', 'title': '', 'excerpt': '能支持TB/GB级数据的查询。 一般查询延迟为几十秒到几分钟。默认计算容量为32 CRPUs， 目前这个默认计算容量不可设置，但是roadmap中未来打算让用户可以进行设置。(Slack中Ryan 提到，如果引擎中任务有积压，它能够scale up）', 'type': 'QUESTION_ANSWER'})]

In [7]:
resp = kqa.kendra_chain_qa('AWS clean room支持的数据源有哪些')
resp['query'], resp['result']

('AWS clean room支持的数据源有哪些', '根据提供的背景知识，AWS Clean Room支持的数据源目前只有S3。')

In [8]:
# web search API test
web_searcher.search('moss大模型')

0. 国内首个类ChatGPT 模型：复旦大学MOSS 今日正式开源 - IT之家: 据介绍，MOSS 是一个支持中英双语和多种插件的开源对话语言模型，moss-moon 系列模型具有160 亿参数，在FP16 精度下可在单张A100 / A800 或两张3090 显卡 ... # https://www.ithome.com/0/688/006.htm #
1. 复旦MOSS大模型正式开源：能写文案、生成图像、解数学题 - 巴比特: 目前，MOSS模型已上线开源，相关代码、数据、模型参数在Github和Hugging Face等平台开放，供科研人员下载。 复旦大学计算机科学技术学院教授MOSS系统负责 ... # https://www.8btc.com/article/6814840 #
2. MOSS: 登录注册 · 申请问卷. # https://moss.fastnlp.top/ #
3. 复旦大学MOSS 大模型正式开源；周鸿祎：360 员工不会被GPT 淘汰: 据介绍，MOSS 是一个支持中英双语和多种插件的开源对话语言模型，moss-moon 系列模型具有160 亿参数，在FP16 精度下可在单张A100 / A800 或两张3090 显卡 ... # https://www.geekpark.net/news/318008 #
4. 复旦团队大模型MOSS 开源了，有哪些技术亮点值得关注？ - 知乎: TL, DR：复旦NLP 团队的MOSS 大语言模型今天开源了，增加「搜索引擎、计算器、解方程、文生图」等插件功能，可以在线体验，支持本地部署。 # https://www.zhihu.com/question/596908242 #


In [9]:
# hierachical chain test
hier_agent.query('AWS clean room支持哪些数据源')



[1m> Entering new  chain...[0m
-DD-intermediate_steps:  []
-DD-formated_prompt_temp:  现在你是一个固定任务的机器人。这里有一些已知信息：
        问题:AWS clean room支持哪些数据源。解答:根据提供的背景知识，AWS Clean Room支持S3数据源。
        

        Instruction: 只能基于以上提供的已知信息，回答问题“AWS clean room支持哪些数据源”。如果没有或者缺失必要的相关信息，请你只回答"WebSearch('搜索词')"，并将'搜索词'替换为你认为需要搜索的关键词。下面请回答我上面提出的问题。
        解答:
-DD-CustomOutputParser-f_llm_output:  根据提供的已知信息 AWS Clean Room支持S3数据源 
-DD-CustomOutputParser-match:  None
[32;1m[1;3m根据提供的已知信息，AWS Clean Room支持S3数据源。[0m

[1m> Finished chain.[0m


'根据提供的已知信息，AWS Clean Room支持S3数据源。'