In [19]:
import os
import yaml
import openai

with open('../environment.yaml', 'r') as f:
    env = yaml.safe_load(f)
    os.environ.update(env)

In [20]:
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_base = os.environ['OPENAI_API_BASE']

In [21]:
from typing import Optional
from langchain.chains.openai_functions import (
    create_structured_output_cxixihain,
)
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import HumanMessage, SystemMessage

In [22]:
from pydantic import BaseModel, Field
from typing import Optional

# Base Class
class SearchRecommendation(BaseModel):
    summary: str = Field(..., title='Summary of the whole text page in Chinese language.')
    standard_exist: bool = Field(..., title="Determine whether there are publications or implementation of new standards. Note that it should not associated with certification bodies or other actions related to certification documents, but only with the publication and implementation of new standards.")
    citation: Optional[str] = Field(..., title='The citation (source content) of this text can prove that it contains the publication and implementation of new standards.')
    classification: Optional[str] = Field(..., title="Content of this text mainly belong to.")

In [23]:
openai.Model.list()

<OpenAIObject list at 0x16738c950> JSON: {
  "data": [
    {
      "id": "gpt-3.5-turbo",
      "object": "model",
      "created": 1677649963,
      "owned_by": "openai",
      "permission": [
        {
          "id": "modelperm-LwHkVFn8AcMItP432fKKDIKJ",
          "object": "model_permission",
          "created": 1626777600,
          "allow_create_engine": true,
          "allow_sampling": true,
          "allow_logprobs": true,
          "allow_search_indices": false,
          "allow_view": true,
          "allow_fine_tuning": false,
          "organization": "*",
          "group": null,
          "is_blocking": false
        }
      ],
      "root": "gpt-3.5-turbo",
      "parent": null
    },
    {
      "id": "gpt-3.5-turbo-0301",
      "object": "model",
      "created": 1677649963,
      "owned_by": "openai",
      "permission": [
        {
          "id": "modelperm-LwHkVFn8AcMItP432fKKDIKJ",
          "object": "model_permission",
          "created": 1626777600,
       

In [24]:
# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.
llm = ChatOpenAI(
                        model='gpt-3.5-turbo',
                        temperature=0.5,
                        openai_api_key=openai.api_key
                 )

system_message = SystemMessage(
        content="You are Chat-GPT, you help third-party testing company employees determine whether there are new standards for administrative notification by reading the text of a html page. First you make a summary in Chinese, if there is a publication or implementation of a new standard, make a classification for this page most likely belong to, and make sure your choices in {Safety, EMC, RF, EE, Chemical} or None. ")
# human_message = HumanMessage(
#         content="Generate different inquiry methods based on the {testing_field} and professional {domain_keywords} similar to the {search_example}. They should effectively assist me in filtering out the new standards or regulations in the corresponding {testing_field} in Thailand when using a search engine. Note that your recommendation for search query must has a strong relationship with {testing_field} and {domain_keywords}"
# )
prompt_msgs = [system_message, HumanMessagePromptTemplate.from_template("{input}")]
prompt = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(SearchRecommendation, llm, prompt, verbose=False)

In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'https://www.bis.gov.in/wp-content/uploads/2022/06/Measurement-Uncertainty-29-30-August-2022.pdf'

def get_text(url):
    response = requests.get(url)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    return soup

In [26]:
content2 = get_text('https://www.imanor.gov.ma/enquete-publique-40-2022/').text

In [27]:
content3 = get_text('https://www.sabs.co.za/emc/').text

In [28]:
def get_result(content):
    res = chain.run(content)
    print('原文总结：', res.summary)
    print('是否推送：', res.standard_exist)
    print('参考：', res.citation)
    print('原文分类：', res.classification)

contents = [
     """
2-DAY TRAINING ON “MEASUREMENT UNCERTAINTY”
at National Institute of Training for Standardization (NITS),
A-20 &21, Institutional Area, Sector-62, Noida, U.P. -201309
29-30 AUGUST 2022
""",
    content2,
    content3,
    """कें द्रीय मुहर विभाग-2
हमारा संदर्भ : के मु वि-2/16 : 3521 (पार्भ 7) वदनांक: 10-06-2022
विषय: “व्यक्तिगि फॉल एरे स्ट प्रणाली - विशिक्टट भाग 7 स िंगल पाइन्ट एंकररंग यंत्र “ का
आई एस 3521 (पाटट 7) : 2021 के अनुसार अखिल भारिीय प्रथम लाइसेंस का अनुदान ।

उपरोक्त विषय से संबन्धित संलगन पररपत्र सर्ी संबंवितों की जानकारी के वलए पररचावलत है।
(ज्ञान प्रकाश)
 िैज्ञावनक- सी(सी एम डी-II)
प्रमुख (सी एम डी-II)
सभी संबंवितों को पररचावित
आई टी एस विभाग- बीआईएस िैबसाइट पर “व्हाट्स न्यू” अपिोड करने हेतु
CENTRAL MARKS DEPARTMENT-2
Our Ref: CMD-2/16 : IS 3521 (Part 7) Date: 10-06-2022
Subject: Grant of All India First Licence for “Personal Fall Arrest Systems - Specificati
    """,
    """
    Documentary guidelines
    These guidelines show how we interpret the meaning of ‘documentary’. This helps us administer the Broadcasting Services (Australian Content and Children’s Television) Standards 2020 for commercial TV licensees.

    The guidelines were updated in June 2021 to make them more relevant and helpful by including examples of recent documentaries.

    application/pdf
    ACMA documentary guidelines 2021 (253.34 KB)
    report Online enquiries
    Last updated: 08 June 2021

    """
]


In [29]:
for content in contents:
    get_result(content)
    print('---------------------')

原文总结： 2天关于“测量不确定性”的培训将在国家标准化培训研究所（NITS）举行。地点位于诺伊达市62区机构区域A-20和A-21。培训将于2022年8月29日至30日举行。
是否推送： False
参考： 
原文分类： None
---------------------
原文总结： 这是一个关于IMANOR（摩洛哥国家标准化机构）的网页，主要包括关于IMANOR的介绍、组织结构、战略、标准制定流程、摩洛哥国家标准文件、国际和区域标准化参与、市场公共采购中的标准应用、IMANOR的认证服务、培训项目、最新消息和信息查询等内容。网页中没有提到任何新的标准发布或实施。
是否推送： False
参考： 
原文分类： None
---------------------
原文总结： 南非标准局（SABS）是南非共和国指定的国家标准化机构。其职责是制定、推广和维护南非国家标准，确保商品、产品和服务的质量。SABS提供符合性评估服务，并在其职责范围内提供协助。2016年，SABS与利益相关者就SABS合规证书的成本和经济影响进行了讨论。同年，南非独立通信管理局（ICASA）和SABS签署了一份谅解备忘录，确保进入南非市场的电子产品经过审查，以防止低质量产品进入市场。
是否推送： True
参考： emc@sabs.co.za
原文分类： EMC
---------------------
原文总结： इस पेज में एक नई मानक के प्रकटन और कार्यान्वयन के बारे में जानकारी है। विभाजन: None
是否推送： True
参考： आई एस 3521 (पाटट 7) : 2021
原文分类： None
---------------------
原文总结： 这个页面是有关文献指南的，指南解释了'文献'的含义，并帮助我们执行2020年的广播服务（澳大利亚内容和儿童电视）标准。指南在2021年6月进行了更新，以使其更相关和有帮助性，包括最近文献的示例。
是否推送： True
参考： ACMA documentary guidelines 2021 (253.34 KB)
原文分类： None
---------------------
