## Search Recommendation

In [24]:
import os
import yaml
import openai

with open('./environment.yaml', 'r') as f:
    env = yaml.safe_load(f)
    os.environ.update(env)

In [25]:
openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_base = os.environ['OPENAI_API_BASE']

In [26]:
import pandas as pd

original_data = pd.read_excel('./泰国-hm.xlsx', header=1).drop(columns='备注').fillna('None')

In [27]:
original_data

Unnamed: 0,检测领域,领域关键词,搜索样例
0,Type Approval,Auto,thailand DLT consulting
1,Homologation,Vehicle,thai vehicle inspection
2,Certificate,Electric vehicle,thai homologation
3,Cert,Autonomous driving,thai vehicle consulting
4,Approval,DLT,thailand DLT service
5,Consulting,TISI,thai emission testing lab
6,REACH,,
7,Chemical regulation,,
8,inspection,,
9,emmision,,


In [28]:
original_data['领域关键词']

0                  Auto 
1                Vehicle
2       Electric vehicle
3     Autonomous driving
4                    DLT
5                   TISI
6                   None
7                   None
8                   None
9                   None
10                  None
Name: 领域关键词, dtype: object

In [29]:
from typing import Optional
from langchain.chains.openai_functions import (
    create_structured_output_chain,
)
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.schema import HumanMessage, SystemMessage

In [49]:
from pydantic import BaseModel, Field
from typing import Optional


# Base Class
class SearchRecommendation(BaseModel):
    search_keywords: list[str] = Field(..., title="Search Keywords about the topic. Less than 3.")
    search_query: list[str] = Field(..., title="Search Query about the topic. Less than 3.")
    search_query_thai: list[str] = Field(..., title="Search Query about the topic in Thai language. Less than 3.")
    

In [56]:
# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.
llm = ChatOpenAI(model="gpt-4", 
                 temperature=0.7)

system_message = SystemMessage(
        content="You are Chat-GPT, assisting third-party inspection personnel in searching for updates on third-party testing standards in the corresponding country and professional field. By using the provided professional keywords(maybe not provided) in special filed, you generate similar professional keywords that have similar semantic meanings. Then you help them organize multiple effective queries in search engines using these keywords.")
human_message = HumanMessage(
        content="Generate different inquiry methods based on the {testing_field} and professional {domain_keywords} similar to the {search_example}. They should effectively assist me in filtering out the new standards or regulations in the corresponding {testing_field} in Thailand when using a search engine. Note that your recommendation for search query must has a strong relationship with {testing_field} and {domain_keywords}"
)

In [57]:
prompt_msgs = [system_message, human_message, HumanMessagePromptTemplate.from_template("{input}")]
prompt = ChatPromptTemplate(messages=prompt_msgs)
chain = create_structured_output_chain(SearchRecommendation, llm, prompt, verbose=False)

In [58]:
res = []
for i in range(len(original_data)):
    input_query = '{testing_field}: ' + original_data['检测领域'][i] + '\n' + '{domain_keywords}: ' + original_data['领域关键词'][i] + '\n' + '{search_example}: ' + original_data['搜索样例'][i]
    res_part = chain.run(input_query)
    print(res_part)
    res.append(res_part)
    # print(original_data['检测领域'][i])

search_keywords=['Thailand Vehicle Type Approval', 'Thai Automobile Regulations', 'Vehicle Certification Thailand', 'Thai Auto Standards', 'Automotive Compliance Thailand'] search_query=['Thailand Vehicle Type Approval updates', 'Current Thai Automobile Regulations', 'Latest updates in Vehicle Certification Thailand', 'Thai Auto Standards changes', 'New Automotive Compliance standards in Thailand'] search_query_thai=['ปรับปรุงล่าสุดของการอนุมัติประเภทของยานพาหนะในประเทศไทย', 'กฎระเบียบรถยนต์ไทยปัจจุบัน', 'ปรับปรุงล่าสุดในการรับรองยานพาหนะในประเทศไทย', 'การเปลี่ยนแปลงมาตรฐานรถยนต์ไทย', 'มาตรฐานการปฏิบัติงานทางยานยนต์ใหม่ในประเทศไทย']
search_keywords=['Thailand Homologation Standards', 'Vehicle Homologation norms in Thailand', 'Thailand Vehicle Regulation', 'Vehicle Testing Procedures in Thailand', 'Thai Homologation updates', 'Vehicle Certification in Thailand', 'Vehicle Compliance in Thailand', 'Thai Vehicle testing standards'] search_query=['latest Thailand Homologation Standards for 

KeyError: 'function_call'

In [59]:
original_data

Unnamed: 0,检测领域,领域关键词,搜索样例
0,Type Approval,Auto,thailand DLT consulting
1,Homologation,Vehicle,thai vehicle inspection
2,Certificate,Electric vehicle,thai homologation
3,Cert,Autonomous driving,thai vehicle consulting
4,Approval,DLT,thailand DLT service
5,Consulting,TISI,thai emission testing lab
6,REACH,,
7,Chemical regulation,,
8,inspection,,
9,emmision,,


In [60]:
original_data[['搜索关键词（推荐）', '搜索样例推荐（英语）', '搜索样例推荐（泰语）']] = '', '', ''

In [61]:
original_data.iloc[0:1]

Unnamed: 0,检测领域,领域关键词,搜索样例,搜索关键词（推荐）,搜索样例推荐（英语）,搜索样例推荐（泰语）
0,Type Approval,Auto,thailand DLT consulting,,,


In [62]:
res[0].search_keywords

['Thailand Vehicle Type Approval',
 'Thai Automobile Regulations',
 'Vehicle Certification Thailand',
 'Thai Auto Standards',
 'Automotive Compliance Thailand']

In [63]:
new_res = []
for i in range(len(original_data)):
    assert len(res[i].search_query) == len(res[i].search_query_thai)
    new_df = pd.concat([original_data[i:i + 1].copy()]*(len(res[i].search_query)), axis=0)
    for j in range(len(res[i].search_query)):
        new_df.iloc[j, -3] = ' / '.join(res[i].search_keywords)
        new_df.iloc[j, -2] = res[i].search_query[j]  # 使用整数索引进行赋值
        new_df.iloc[j, -1] = res[i].search_query_thai[j]  # 使用整数索引进行赋值
    new_res.append(new_df)

IndexError: list index out of range

In [None]:
new_res[0]

Unnamed: 0,检测领域,领域关键词,搜索样例,搜索关键词（推荐）,搜索样例推荐（英语）,搜索样例推荐（泰语）
0,Type Approval,Auto,thailand DLT consulting,Type Approval / Auto / standards / regulations...,Type Approval standards and regulations in Tha...,มาตรฐานและกฎระเบียบการอนุมัติประเภทในประเทศไทย
0,Type Approval,Auto,thailand DLT consulting,Type Approval / Auto / standards / regulations...,Auto Type Approval updates in Thailand,อัปเดตการอนุมัติประเภทรถยนต์ในประเทศไทย
0,Type Approval,Auto,thailand DLT consulting,Type Approval / Auto / standards / regulations...,DLT Type Approval consulting in Thailand,การให้คำปรึกษาในการอนุมัติประเภทจากกรมการขนส่ง...


In [None]:
final_res = pd.concat(new_res).groupby(['检测领域', '领域关键词', '搜索样例', '搜索关键词（推荐）', '搜索样例推荐（英语）']).agg({'搜索样例推荐（泰语）': lambda x: x,})

In [None]:
final_res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,搜索样例推荐（泰语）
检测领域,领域关键词,搜索样例,搜索关键词（推荐）,搜索样例推荐（英语）,Unnamed: 5_level_1
Approval,DLT,thailand DLT service,Thailand DLT approval / DLT service approval in Thailand / DLT regulations Thailand / Thailand DLT standards / DLT testing service Thailand / Approval standards for DLT in Thailand / DLT approval requirements Thailand,Changing standards for DLT testing service in Thailand,การเปลี่ยนแปลงมาตรฐานสำหรับบริการทดสอบ DLT ในป...
Approval,DLT,thailand DLT service,Thailand DLT approval / DLT service approval in Thailand / DLT regulations Thailand / Thailand DLT standards / DLT testing service Thailand / Approval standards for DLT in Thailand / DLT approval requirements Thailand,Current approval standards for DLT in Thailand,มาตรฐานการอนุมัติปัจจุบันสำหรับ DLT ในประเทศไทย
Approval,DLT,thailand DLT service,Thailand DLT approval / DLT service approval in Thailand / DLT regulations Thailand / Thailand DLT standards / DLT testing service Thailand / Approval standards for DLT in Thailand / DLT approval requirements Thailand,Latest DLT approval requirements in Thailand,ความต้องการในการอนุมัติ DLT ล่าสุดในประเทศไทย
Approval,DLT,thailand DLT service,Thailand DLT approval / DLT service approval in Thailand / DLT regulations Thailand / Thailand DLT standards / DLT testing service Thailand / Approval standards for DLT in Thailand / DLT approval requirements Thailand,Latest Thailand DLT approval standards,มาตรฐานการอนุมัติ DLT ล่าสุดของประเทศไทย
Approval,DLT,thailand DLT service,Thailand DLT approval / DLT service approval in Thailand / DLT regulations Thailand / Thailand DLT standards / DLT testing service Thailand / Approval standards for DLT in Thailand / DLT approval requirements Thailand,New DLT service approval regulations in Thailand,ข้อบังคับใหม่เกี่ยวกับการอนุมัติบริการ DLT ในป...
...,...,...,...,...,...
inspection,,,inspection standards / inspection regulations / inspection guidelines / Thailand inspection requirements / inspection procedures Thailand / inspection criteria in Thailand,latest inspection guidelines in Thailand,แนวทางการตรวจสอบล่าสุดในประเทศไทย
inspection,,,inspection standards / inspection regulations / inspection guidelines / Thailand inspection requirements / inspection procedures Thailand / inspection criteria in Thailand,new inspection procedures in Thailand,กระบวนการตรวจสอบใหม่ในประเทศไทย
inspection,,,inspection standards / inspection regulations / inspection guidelines / Thailand inspection requirements / inspection procedures Thailand / inspection criteria in Thailand,new inspection regulations in Thailand,ระเบียบการตรวจสอบใหม่ในประเทศไทย
inspection,,,inspection standards / inspection regulations / inspection guidelines / Thailand inspection requirements / inspection procedures Thailand / inspection criteria in Thailand,recent inspection standards in Thailand,มาตรฐานการตรวจสอบล่าสุดในประเทศไทย


In [None]:
final_res.to_excel('final_res.xlsx')

In [None]:
final_res

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,搜索样例推荐（泰语）
检测领域,领域关键词,搜索样例,搜索关键词（推荐）,搜索样例推荐（英语）,Unnamed: 5_level_1
Approval,DLT,thailand DLT service,new regulations / latest standards / updated requirements / recent guidelines / current rules,DLT guidelines in Thailand,แนวทาง DLT ในประเทศไทย
Approval,DLT,thailand DLT service,new regulations / latest standards / updated requirements / recent guidelines / current rules,DLT regulations in Thailand,กฎระเบียบ DLT ในประเทศไทย
Approval,DLT,thailand DLT service,new regulations / latest standards / updated requirements / recent guidelines / current rules,Thailand approval requirements,ข้อกำหนดการอนุมัติประเภทในประเทศไทย
Approval,DLT,thailand DLT service,new regulations / latest standards / updated requirements / recent guidelines / current rules,Thailand approval rules,กฎเกณฑ์การอนุมัติประเภทในประเทศไทย
Approval,DLT,thailand DLT service,new regulations / latest standards / updated requirements / recent guidelines / current rules,Thailand approval standards,มาตรฐานการอนุมัติประเภทในประเทศไทย
...,...,...,...,...,...
emmision,,,emission testing standards Thailand / emission regulations Thailand / new emission standards Thailand / latest emission regulations Thailand,new emission standards Thailand,มาตรฐานการปล่อยก๊าซใหม่ Thailand
inspection,,,inspection standards Thailand / inspection regulations Thailand / new inspection standards Thailand / updated inspection regulations Thailand,inspection regulations in Thailand,กฎระเบียบการตรวจสอบในประเทศไทย
inspection,,,inspection standards Thailand / inspection regulations Thailand / new inspection standards Thailand / updated inspection regulations Thailand,inspection standards in Thailand,มาตรฐานการตรวจสอบในประเทศไทย
inspection,,,inspection standards Thailand / inspection regulations Thailand / new inspection standards Thailand / updated inspection regulations Thailand,new inspection standards in Thailand,มาตรฐานการตรวจสอบใหม่ในประเทศไทย


## Google Search

In [7]:
import pandas as pd

key_words = pd.read_excel('../data/res.xlsx')

In [19]:
from collections import defaultdict

field_keyword_dict = defaultdict(list)
key = None
for i in range(len(key_words)):
    if not pd.isna(key_words['检测领域'][i]):
        key = key_words['检测领域'][i]
    if pd.isna(key_words['合格'][i]):
        search_ele_en = key_words['搜索样例推荐（英语）'][i]
        search_ele_tai = key_words['搜索样例推荐（泰语）'][i] if not pd.isna(key_words['搜索样例推荐（泰语）'][i]) else None
        field_keyword_dict[key].append([search_ele_en, search_ele_tai])

In [21]:
key_words

Unnamed: 0,检测领域,领域关键词,搜索样例,搜索关键词（推荐）,搜索样例推荐（英语）,搜索样例推荐（泰语）,合格
0,Approval,DLT,thailand DLT service,approval standards Thailand / regulations for ...,DLT accreditation Thailand,การรับรอง DLT ประเทศไทย,N
1,,,,,DLT approval guidelines,แนวทางการอนุมัติ DLT,
2,,,,,DLT approval process Thailand,กระบวนการอนุมัติ DLT ในประเทศไทย,
3,,,,,DLT approval requirements,ข้อกำหนดการอนุมัติ DLT,
4,,,,,DLT approval updates,อัปเดตการอนุมัติ DLT,
...,...,...,...,...,...,...,...
63,,,,,latest inspection regulations Thailand,กฎระเบียบการตรวจสอบล่าสุดประเทศไทย,N
64,,,,,new inspection standards Thailand,มาตรฐานการตรวจสอบใหม่ประเทศไทย,N
65,,,,,Auto consulting in Thailand,,
66,,,,,Vehicle consulting in Thailand,,


In [20]:
field_keyword_dict

defaultdict(list,
            {'Approval': [['DLT approval guidelines', 'แนวทางการอนุมัติ DLT'],
              ['DLT approval process Thailand',
               'กระบวนการอนุมัติ DLT ในประเทศไทย'],
              ['DLT approval requirements', 'ข้อกำหนดการอนุมัติ DLT'],
              ['DLT approval updates', 'อัปเดตการอนุมัติ DLT'],
              ['DLT certification Thailand', 'การรับรอง DLT ประเทศไทย']],
             'Cert ': [['Thailand autonomous driving regulations update',
               'ประมวลกฎหมายการขับรถไร้คนขับในประเทศไทย'],
              ['Thailand autonomous driving standards and regulations',
               'มาตรฐานและกฎหมายการขับรถยนต์ไร้คนขับในประเทศไทย'],
              ['Thailand certification standards for autonomous driving',
               'มาตรฐานการรับรองในประเทศไทยสำหรับรถยนต์ไร้คนขับ'],
              ['Thailand vehicle consulting for cert testing',
               'การให้คำปรึกษารถยนต์ในประเทศไทยสำหรับการรับรอง']],
             'Certificate': [['Thai regulations on 

In [51]:
import aiohttp
import json
import asyncio
import nest_asyncio
import random

nest_asyncio.apply()

async def get_google_result(query):
    # await asyncio.sleep(random.randint(1, 5))
    url = "https://google.serper.dev/search"
    payload = json.dumps({
      "q": query,
      "num": 20,
    })
    headers = {
        'X-API-KEY': '702efdffd42d58b1369eb20cc0951aca3ec8470b',
        'Content-Type': 'application/json'
    }

    async with aiohttp.ClientSession() as session:
        async with session.post(url, data=payload, headers=headers) as resp:
            data = await resp.text()
    return data

In [53]:
field_keyword_dict['Approval']

[['DLT approval guidelines', 'แนวทางการอนุมัติ DLT'],
 ['DLT approval process Thailand', 'กระบวนการอนุมัติ DLT ในประเทศไทย'],
 ['DLT approval requirements', 'ข้อกำหนดการอนุมัติ DLT'],
 ['DLT approval updates', 'อัปเดตการอนุมัติ DLT'],
 ['DLT certification Thailand', 'การรับรอง DLT ประเทศไทย']]

In [65]:
list(field_keyword_dict.keys())

['Approval',
 'Cert ',
 'Certificate',
 'Chemical regulation',
 'Homologation',
 'REACH',
 'Type Approval',
 'emmision',
 'inspection']

In [57]:
async def main(field_keyword_dict, key):
    query_lis_en = [x[0] for x in field_keyword_dict[key]]
    query_tasks = [asyncio.create_task(get_google_result(query)) for query in query_lis_en]
    results = await asyncio.gather(*query_tasks)
    return key, results

In [58]:
res_dict = defaultdict(list)
for field in field_keyword_dict.keys():
    key, results = asyncio.run(main(field_keyword_dict, field))
    res_dict[key] = [eval(result) for result in results]

  self._waiters = collections.deque()


In [60]:
res_dict.keys()

dict_keys(['Approval', 'Cert ', 'Certificate', 'Chemical regulation', 'Homologation', 'REACH', 'Type Approval', 'emmision', 'inspection'])

In [63]:
res_dict['Approval']

[{'searchParameters': {'q': 'DLT approval guidelines',
   'num': 20,
   'type': 'search'},
  'organic': [{'title': 'DLT Registration Process & Guidelines | DLT verified entity approved by TRAI',
    'link': 'https://www.mysmsmantra.com/dlt-registration.html',
    'snippet': 'Step 1: Entity/ Enterprise Registration; Step 2: Header (Sender ID) Registration; Step 3: Consent Template Registration; Step 4: Content Template Registration.',
    'position': 1},
   {'title': 'A Step by Step Guide to DLT Registration in India for sending SMS - EnableX.io',
    'link': 'https://www.enablex.io/insights/a-step-by-step-guide-to-dlt-registration/',
    'snippet': 'Learn how you can apply for DLT registration to send SMS to customers. DLT Registration is mandatory to send SMS as a Business in India.',
    'date': 'Jan 23, 2023',
    'position': 2},
   {'title': 'Is DLT Registration mandatory? Exploring the complete TRAI guidelines - Medium',
    'link': 'https://medium.com/@spaceedge.seo/is-dlt-regist