In [1]:
from flask import Flask, request, jsonify, render_template
from transformers import AutoTokenizer, AutoModelWithLMHead, MT5ForConditionalGeneration
from elasticsearch import Elasticsearch
from bs4 import BeautifulSoup
from torch.nn.functional import softmax
import requests
import torch

#启动docker里的elasticsearch870容器

es = Elasticsearch(hosts="http://localhost:9200")
tokenizer = AutoTokenizer.from_pretrained("/Users/administrator/nlpmodels/huggingface-T5QA")
model = MT5ForConditionalGeneration.from_pretrained("/Users/administrator/nlpmodels/huggingface-T5QA")

def get_response(input_text):
    query = {
        "query": {
            "bool": {
                "should": [
                    {
                        "multi_match": {
                            "query": input_text,
                            "fields": ["content", "caption"],
                            "boost": 2.0,
                            "fuzziness": "AUTO",
                            "analyzer": "custom_analyzer"
                        }
                    },
                    {
                        "match_phrase_prefix": {
                            "content": {
                                "query": input_text,
                                "boost": 1.5,
                                "analyzer": "custom_analyzer"
                            }
                        }
                    }
                ],
                "minimum_should_match": 1
            }
        },
        "size": 10,
        "_source": ["id", "caption", "content"],
        "highlight": {
            "fields": {
                "content": {}
            }
        }
    }

    res = es.search(index='cupdkb_index', body=query)
    es_results = []
    for hit in res['hits']['hits']:
        source = hit["_source"]
        document_id = hit["_id"]
        document_caption = source["caption"].replace("\n", "")
        highlight = hit.get("highlight", {})
        content_highlight = highlight.get("content", [])
        content_snippet = content_highlight[0] if content_highlight else source["content"]
        content_snippet = BeautifulSoup(content_snippet, "html.parser").get_text()
        
        
        es_results.append({
            "id": document_id,
            "caption": document_caption,
            "content": content_snippet
        })

    answers = []

    if es_results:
        for es_result in es_results:
            task_specific_input = f"question: {input_text} context: {es_result['content']}"
            inputs = tokenizer.encode_plus(task_specific_input, return_tensors='pt')
            outputs = model.generate(inputs['input_ids'], max_length=512, temperature=0.7, no_repeat_ngram_size=2, return_dict_in_generate=True, output_scores=True)
            decoded_output = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
            answer = decoded_output.split('<eos>')[0]
            scores = outputs.scores[0]
            probabilities = softmax(scores, dim=-1)
            avg_probability = probabilities.mean().item()
            answers.append({"id": es_result["id"], "caption": es_result["caption"], "answer": answer, "probability": avg_probability})

        answers.sort(key=lambda x: x['probability'], reverse=True)
        top_answers = answers[:3]
        
        ###############################
        print('----------es_results: Found-------------')
        print({
            'status': 'found',
            'answers': [{'id': answer['id'], 'caption': answer['caption'], 'answer': answer['answer']} for answer in top_answers]
        })
        ###############################
        
        return {
            'status': 'found',
            'answers': [{'id': answer['id'], 'caption': answer['caption'], 'answer': answer['answer']} for answer in top_answers]
        }
    else:
        inputs = tokenizer.encode(input_text, return_tensors='pt')
        outputs = model.generate(inputs, max_length=512, num_return_sequences=1, temperature=0.7, no_repeat_ngram_size=2)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        ###############################
        print('----------es_results: Not Found-------------')
        print({
            'status': 'not_found',
            'response': response
        })
        ###############################
        
        return {
            'status': 'not_found',
            'response': response
        }
    
    
    print('----------es_results: Error-------------')
    print({
        'status': 'error',
        'message': 'Unexpected error occurred.'
    })
    
    
    return {
        'status': 'error',
        'message': 'Unexpected error occurred.'
    }
    
    
    

app = Flask(__name__)

@app.route('/')
def home():
    return render_template('index.html')


@app.route('/api/get_response', methods=['POST'])
def get_flask_response():
    data = request.get_json()
    user_input = data['input_text']
    try:
        response = get_response(user_input)
        return jsonify(response)
    except Exception as e:
        print(e)
        return jsonify({"status": "error", "message": "Unexpected error occurred."})

    
    
    
@app.route('/api/get_detail', methods=['POST'])
def get_detail():
    data = request.get_json()
    id = data['id']
    # 根据ID获取详细内容
    detail = get_detail_content(id)  # 自定义函数，根据ID获取详细内容
    
    if detail:
        return jsonify({'status': 'found', 'detail': detail})
    else:
        return jsonify({'status': 'not_found'})


@app.route('/detail')
def show_detail():
    id = request.args.get('id')
    detail = get_detail_content(id)  # 自定义函数，根据ID获取详细内容
    
    if detail:
        return render_template('detail.html', detail=detail)
    else:
        return render_template('detail.html', detail=None)


def get_detail_content(id):
    # 使用Elasticsearch查询根据ID获取详细内容
    query = {
        "query": {
            "ids": {
                "values": [id]
            }
        },
        "_source": ["id", "caption", "content"]
    }

    res = es.search(index='cupdkb_index', body=query)

    if res['hits']['total']['value'] > 0:
        hit = res['hits']['hits'][0]
        source = hit["_source"]
        detail = {
            "id": source["id"],
            "caption": source["caption"],
            "content": source["content"]
        }
        return detail
    else:
        return None



if __name__ == '__main__':
    app.run(host='0.0.0.0', port=9898)





 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:9898
 * Running on http://10.170.0.87:9898
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [12/Jun/2023 14:27:13] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [12/Jun/2023 14:27:13] "[36mGET /static/bot.png HTTP/1.1[0m" 304 -
127.0.0.1 - - [12/Jun/2023 14:27:18] "[36mGET /static/user.png HTTP/1.1[0m" 304 -
  res = es.search(index='cupdkb_index', body=query)
127.0.0.1 - - [12/Jun/2023 14:28:01] "POST /api/get_response HTTP/1.1" 200 -


----------es_results: Found-------------
{'status': 'found', 'answers': [{'id': 'HRFmVogBgn8Xb9O1P0Gn', 'caption': 'IC卡降级交易管控', 'answer': '降级交易管控需要,可以在ICMS系统PRMCE画面可以通过“禁止银联行内IC卡境内降级的交易(POS)”、“禁止银行境内IC产品境内的降片降价交易”(ATM)”的勾选对磁条芯片复合卡进行降一级交易的禁止。'}, {'id': 'KhFmVogBgn8Xb9O1Qkhc', 'caption': 'ICMS2:修改PRMCE画面降级交易开关字段名称（服务台变更单：CHG-140708-0023）', 'answer': '标题:ICMS2:修改PRMCE画面降级交易开关字段名称 描述:PRMC画面原字符串名:禁止银联IC卡降级的交易(POS) 和 禁止银行级IC产品降度的交易'}, {'id': 'fxFmVogBgn8Xb9O1Qkdc', 'caption': 'IC卡降级交易银行级开关（服务台变更单：CHG-140618-0029）', 'answer': '标题:IC卡降级交易银行级开关 描述:根据人民银行的要求,发卡行需要在8月31日前关闭ATM、POS渠道的ICcard降级的交易,卡系统目前已经有卡片级的ICCard降一级交易开关,还有高风险地区降级别的管控。'}]}
