In [10]:
from openai import OpenAI
import os
from dotenv import dotenv_values
from ReadLoad import read_jsonl, write_jsonl, read_json,write_json
from tqdm import tqdm
from evaluation import get_embedding, cosine_similarity
from prompt_template import get_sys_prompt, get_qa_prompt, get_refine_prompt, re_query
from eval_data import key_word_score
from qwen import qwen_response
import pandas as pd
import time

config = dotenv_values('.env')
client = OpenAI(
    api_key=config['qwen_key'],
    base_url=config['qwen_url']
)


def get_response(prompt):
    time.sleep(0.1)
    try:
        completion = client.chat.completions.create(
            model="qwen2-7b-instruct",
            messages=[
                {'role': 'system', 'content': get_sys_prompt()},
                {'role': 'user', 'content': prompt}
            ]

            )
        return completion.choices[0].message.content
    except Exception as e:
    # Handle any other unexpected exceptions
        print(f"An unexpected error occurred: {e}")
        return f"An unexpected error occurred: {e}"


In [11]:
def completion(data):
    for d in tqdm(data):
        query = d['问题']
        clause = d['条款']
        name = d['产品名']
        prompt = get_qa_prompt(name,clause,query)
        d['prompt'] = prompt
        #d['answer'] = get_response(prompt)
        #d['answer'] = get_respond(prompt)
        d['original_answer'] = qwen_response(prompt)
        d['refine_prompt'] = get_refine_prompt(query, d['original_answer'], name, clause)
        d['answer'] =  d['original_answer'] #qwen_response(d['refine_prompt'])
    return data

In [12]:
def evaluation(data, name="测试数据"):
    similaritys = []
    key_word_simi  = []
    for d in tqdm(data):
        vec1 = dev_data_embd[d['ID']]
        vec2 = get_embedding(d['answer'])
        similarity = cosine_similarity(vec1, vec2)
        kw_score = key_word_score(d['答案'], d['answer'])
        d['similarity'] = similarity
        d['key_word_similarity'] = kw_score
        similaritys.append(similarity)
        key_word_simi.append(kw_score)
    length = len(data)
    score = sum(similaritys)/length
    key_word_simi = sum(key_word_simi)/length
    print(f'测评数据集：{length} \n相识度得分：{score} \n关键词得分：{key_word_simi}')
    df = pd.DataFrame(data)
    df.to_excel(f"{name}_score_{score}.xlsx", index=False)
    return data

In [13]:
def to_summit_json(data, commit="大道至简_result"):
    result = []
    for d in data:
        rd = {
            "ID": d['ID'],
            "question": d['问题'],
            "answer": d['answer']
        }
        result.append(rd)
    #write_json(result, commit)
    write_jsonl(result, commit)

### loading data

In [16]:
test_data = read_json("dataset/test.json")
# dev_data = read_jsonl("dataset/resultdev_with_embedding.jsonl")
# dev_data_embd = {}
# for d in dev_data:
#     dev_data_embd[d['ID']] = d['ans_embedding']
#     d.pop('ans_embedding')

In [17]:
# 将数据也移到GPU上
# comp_data = completion(dev_data[200:300])
# eval_data = evaluation(comp_data)

test_data_result = completion(test_data)
to_summit_json(test_data_result)


100%|██████████| 1000/1000 [25:28<00:00,  1.53s/it]


### 参考资料

https://dashscope.console.aliyun.com/billing