In [1]:
from openai import OpenAI
import os
from dotenv import dotenv_values
from ReadLoad import read_jsonl, write_jsonl, read_json,write_json
from tqdm import tqdm
from evaluation import get_embedding, cosine_similarity
from prompt_template import get_sys_prompt, get_qa_prompt
import pandas as pd
import time

config = dotenv_values('.env')
client = OpenAI(
    api_key=config['qwen_key'],
    base_url=config['qwen_url']
)


def get_response(prompt):
    time.sleep(0.1)
    try:
        completion = client.chat.completions.create(
            model="qwen2-7b-instruct",
            messages=[
                {'role': 'system', 'content': get_sys_prompt()},
                {'role': 'user', 'content': prompt}
            ]

            )
        return completion.choices[0].message.content
    except Exception as e:
    # Handle any other unexpected exceptions
        print(f"An unexpected error occurred: {e}")
        return f"An unexpected error occurred: {e}"


In [16]:
def completion(data):
    for d in tqdm(data):
        query = d['问题']
        clause = d['条款']
        prompt = get_qa_prompt(clause, query)
        d['prompt'] = prompt
        d['answer'] = get_response(prompt)
    return data

In [26]:
def evaluation(data, name="测试数据"):
    similaritys = []
    for d in tqdm(data):
        vec1 = dev_data_embd[d['ID']]
        vec2 = get_embedding(d['answer'])
        similarity = cosine_similarity(vec1, vec2)
        d['similarity'] = similarity
        similaritys.append(similarity)
    length = len(data)
    score = sum(similaritys)/length
    print(f'测评数据集：{length} 模型推理结果与答案的相识度得分：{score}')
    df = pd.DataFrame(data)
    df.to_excel(f"{name}_score_{score}.xlsx", index=False)
    return data

In [28]:
def to_summit_json(data, commit="大道至简_result"):
    result = []
    for d in data:
        rd = {
            "ID": d['ID'],
            "question": d['问题'],
            "answer": d['answer']
        }
        result.append(rd)
    write_json(result, commit)

### loading data

In [19]:
test_data = read_json("dataset/test.json")
dev_data = read_jsonl("dataset/resultdev_with_embedding.jsonl")
dev_data_embd = {}
for d in dev_data:
    dev_data_embd[d['ID']] = d['ans_embedding']
    d.pop('ans_embedding')

In [29]:
comp_data = completion(dev_data[:2])
eval_data = evaluation(comp_data)
to_summit_json(eval_data)

### 参考资料

https://dashscope.console.aliyun.com/billing