In [55]:
from openai import OpenAI
import os
from dotenv import dotenv_values
from ReadLoad import read_jsonl, write_jsonl, read_json
from tqdm import tqdm

config = dotenv_values('.env')
client = OpenAI(
    api_key = config['qwen_key'],
    base_url = config['qwen_url']
)

def get_response(prompt):
    try:
        completion = client.chat.completions.create(
            model="qwen2-7b-instruct",
            messages=[
                {'role': 'system', 'content': "你是一个基于保险条款的问答系统，对用户提出的有关保险条款的问题给予准确、清晰的回答。"},
                {'role': 'user', 'content': prompt}
            ]

            )
        return completion.choices[0].message.content
    except Exception as e:
    # Handle any other unexpected exceptions
        print(f"An unexpected error occurred: {e}")
        return f"An unexpected error occurred: {e}"


In [24]:
import numpy as np
def cosine_similarity(vec1, vec2):
    """
    Calculate the cosine similarity between two vectors.
    
    :param vec1: First vector (numpy array).
    :param vec2: Second vector (numpy array).
    :return: Cosine similarity score (float).
    """
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    similarity = dot_product / (norm_vec1 * norm_vec2)
    return similarity

In [56]:
#!pip install dashscope
import dashscope
from http import HTTPStatus
dashscope.api_key = config['qwen_key'] 

def get_embedding(embed_text):
    
    respond = ''
    try:
        respond = dashscope.TextEmbedding.call(
            model = dashscope.TextEmbedding.Models.text_embedding_v2,
            input = embed_text)
        
        embedding = respond.output['embeddings'][0]['embedding']
        
    except Exception as e:
        # Handle any other unexpected exceptions
        print(f"An unexpected error occurred: {e}")
    
    return embedding

In [57]:
qa_prompt = '''从保险条款
==========
{}
==========
中找问题
==========
{}
==========
的答案，找到答案就仅使用保险条款中的语句回答问题，找不到答案就用自身知识回答并且告诉用户该信息不是来自文档。
不要复述问题，不要回答无关的内容, 直接开始回答问题。
'''

qa_prompt = '''
保险条款: """{}"""
用户问题："""{}"""
回答规则："""仅使用保险条款中的语句回答问题"""
回答: 
'''

In [58]:
data = read_jsonl("resultdev_with_embedding.jsonl")

In [None]:
#data = read_json("resultdev_with_embedding.json")
similaritys = []
for d in tqdm(data):
    query = d['问题']
    clause = d['条款']
    prompt = qa_prompt.format(clause, query)
    d['prompt'] = prompt
    d['answer'] = get_response(prompt)
    vec1 = data['embedding']
    vec2 = get_embedding(d['answer'])
    similarity = cosine_similarity(vec1, vec2)
    d['similarity'] = similarity
    similaritys.append(similarity)



In [40]:
similaritys = []
for d in tqdm(data):
    vec1 = d['ans_embedding']
    vec2 = get_embedding(d['answer'])
    similarity = cosine_similarity(vec1, vec2)
    d['similarity'] = similarity
    similaritys.append(similarity)

100%|██████████| 1000/1000 [03:53<00:00,  4.28it/s]


In [49]:
import pandas as pd
df = pd.DataFrame(data)
df.to_excel("AFAC240706.xlsx",index=False)

In [65]:
test_data = read_json("test.json")
result = []
for d in tqdm(test_data):
    query = d['问题']
    clause = d['条款']
    prompt = qa_prompt.format(clause, query)
    answer = get_response(prompt)
    rd = {
        "ID": d['ID'],
        "question": query,
        "answer": answer
    }
    result.append(rd)

 22%|██▏       | 219/1000 [03:36<18:51,  1.45s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-f99d9c89-0351-9f26-8ab6-49c1a10612ff'}


 28%|██▊       | 280/1000 [04:31<17:50,  1.49s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-c6507938-acb5-98e9-a5cb-9a4e8219869a'}


 28%|██▊       | 281/1000 [04:34<23:00,  1.92s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-7a3aa16b-b7e4-92aa-946d-c5a7ccbcbd00'}


 28%|██▊       | 282/1000 [04:36<25:33,  2.14s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-da386601-13c9-98cc-8172-f6737b664bb3'}


 40%|███▉      | 396/1000 [06:33<05:34,  1.81it/s]

An unexpected error occurred: Error code: 400 - {'error': {'code': 'data_inspection_failed', 'param': None, 'message': 'Input data may contain inappropriate content.', 'type': 'data_inspection_failed'}, 'id': 'chatcmpl-a1a483b7-c678-92b9-83f8-8ebe5c50b1fc'}


 40%|███▉      | 397/1000 [06:36<12:14,  1.22s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-5f97278b-cc0f-913a-bdab-ee862c36a1a0'}


 62%|██████▏   | 618/1000 [10:33<08:32,  1.34s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-d9a6e6e6-6636-92a4-9988-7055b55656d3'}


 62%|██████▏   | 619/1000 [10:35<10:58,  1.73s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-c2a70e6c-1063-9cbd-b9e6-b2da9dfef4a2'}


 62%|██████▏   | 620/1000 [10:38<12:27,  1.97s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-8344bbb3-30a9-9f95-8995-c5131bb53e07'}


 68%|██████▊   | 681/1000 [11:38<07:21,  1.38s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-d9802d3a-cc58-94ff-96a7-f0cf23994de5'}


 86%|████████▌ | 859/1000 [14:50<01:01,  2.31it/s]

An unexpected error occurred: Error code: 400 - {'error': {'code': 'data_inspection_failed', 'param': None, 'message': 'Input data may contain inappropriate content.', 'type': 'data_inspection_failed'}, 'id': 'chatcmpl-ce9d52c4-2ddd-9813-b1cf-f65a79c2733f'}


 90%|█████████ | 905/1000 [15:35<02:11,  1.39s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-ec9d686c-c5ce-959e-98cf-285121238e14'}


 91%|█████████ | 906/1000 [15:38<02:48,  1.79s/it]

An unexpected error occurred: Error code: 429 - {'error': {'code': 'limit_requests', 'param': None, 'message': 'You exceeded your current requests list.', 'type': 'limit_requests'}, 'id': 'chatcmpl-c6acd433-2145-9faf-89d0-bd46a69e8224'}


100%|██████████| 1000/1000 [17:17<00:00,  1.04s/it]


In [None]:
for i in [218,279,280,281,386,617,618,619,680,904,905]:
    query = test_data[i]['问题']
    clause = test_data[i]['条款']
    prompt = qa_prompt.format(clause, query)
    answer = get_response(prompt)
    rd = {
        "ID": test_data[i]['ID'],
        "question": query,
        "answer": answer
    }
    result[i] = rd

In [None]:
test_data[858]

In [86]:
result[395]['answer'] = "在战争期间，如果被保险人身故或残疾，保险人不负任何给付保险金责任。"
result[858]['answer'] = "如果被保险人使用或拥有的海、陆、空运输工具造成损失和责任，保险人不负责赔偿。"

In [75]:
test_data[218]['ID']

6218

In [87]:
 write_jsonl(result,"result0706")

In [18]:
!pip install tqdm
from tqdm import tqdm
for data in tqdm(dev_data):
    embed_text = data['答案']
    data['ans_embedding'] = get_embedding(embed_text)

Looking in indexes: https://mirrors.cloud.aliyuncs.com/pypi/simple
[33mDEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


100%|██████████| 1000/1000 [03:59<00:00,  4.17it/s]


In [21]:
 write_jsonl(dev_data,"dev_with_embedding")

In [17]:
text1 = "1000万航空意外险具有一年的默认保险期间"
text2 = "1000万航空意外险默认保险期间一年。"
vec1 = get_embedding(text1)
vec2 = get_embedding(text2)
similarity = cosine_similarity(vec1, vec2)
print(similarity)

0.9570271829028671


### 参考资料

https://dashscope.console.aliyun.com/billing