In [None]:
import os
os.environ["WANDB_MODE"] = "disabled"

In [14]:
# Импорт необходимых библиотек
import pandas as pd
import re
from sentence_transformers import SentenceTransformer, util
import numpy as np
from nltk.translate.bleu_score import sentence_bleu

# Загрузка данных из файла Excel
train_data = pd.read_excel('FAQ.xlsx')
test_data = pd.read_excel('QA_test.xlsx')

# Предобработка данных
def preprocess_text(text):
    text = text.lower()  # Приведение текста к нижнему регистру
    text = re.sub(r'[^а-яА-Я\s]', '', text)  # Удаление символов, кроме букв и пробелов
    return text.strip()

# Удаление пустых значений
train_data = train_data.dropna(subset=['Вопрос', 'Ответ'])
test_data = test_data.dropna(subset=['Вопрос', 'Ответ'])

# Применение предобработки к вопросам и ответам
train_data['Вопрос'] = train_data['Вопрос'].apply(preprocess_text)
train_data['Ответ'] = train_data['Ответ'].apply(preprocess_text)

test_data['Вопрос'] = test_data['Вопрос'].apply(preprocess_text)
test_data['Ответ'] = test_data['Ответ'].apply(preprocess_text)

questions_train = train_data['Вопрос'].tolist()
answers_train = train_data['Ответ'].tolist()

# Загрузка модели SBERT
model = SentenceTransformer('distiluse-base-multilingual-cased-v2')

# Создание эмбеддингов для всех вопросов в обучающей выборке
question_embeddings = model.encode(questions_train, convert_to_tensor=True)

# Функция для нахождения ответа на вопрос
def find_answer(user_question, threshold=0.6):
    user_question_embedding = model.encode(user_question, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(user_question_embedding, question_embeddings)[0]
    best_match_idx = similarities.argmax().item()
    best_match_score = similarities[best_match_idx].item()

    if best_match_score < threshold:
        return "Я не знаю", best_match_score  # Возвращать сообщение о том, что ответ не найден

    return answers_train[best_match_idx], best_match_score

# Функция для оценки модели с использованием BLEU-оценки
def evaluate_model(threshold=0.7):
    y_true = []
    y_pred = []
    bleu_scores = []

    for index, row in test_data.iterrows():
        question = row['Вопрос']
        expected_answer = row['Ответ']
        predicted_answer, similarity = find_answer(question, threshold)

        y_true.append(expected_answer)
        y_pred.append(predicted_answer)

        # Вычисление BLEU-оценки для каждого предсказанного ответа
        reference = preprocess_text(expected_answer).split()
        candidate = preprocess_text(predicted_answer).split()
        bleu_score = sentence_bleu([reference], candidate)
        bleu_scores.append(bleu_score)

    avg_bleu_score = np.mean(bleu_scores)
    print(f'Average BLEU Score at threshold {threshold}: {avg_bleu_score * 100:.2f}%')

    # Вычисление точности на основе BLEU
    correct_answers = [
        1 if bleu >= threshold else 0
        for bleu in bleu_scores
    ]
    accuracy = np.mean(correct_answers)

    return accuracy, avg_bleu_score

# Цикл для оценки модели с разными порогами
print("Оценка модели при различных порогах:")
for threshold in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:
    accuracy, avg_bleu_score = evaluate_model(threshold=threshold)
    print(f'Threshold: {threshold} - Accuracy: {accuracy * 100:.2f}%, Average BLEU Score: {avg_bleu_score * 100:.2f}%')

# Добавление предсказанных ответов и коэффициентов доверия в тестовый датасет
test_data['predicted_answer'], test_data['confidence_score'] = zip(*test_data['Вопрос'].apply(lambda q: find_answer(q, threshold=0.6)))

# Сохранение результатов в Excel
test_data.to_excel('FAQ_test_predictions.xlsx', index=False)
print("Предсказания сохранены в 'FAQ_test_predictions.xlsx'")

Оценка модели при различных порогах:


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Average BLEU Score at threshold 0.1: 84.03%
Threshold: 0.1 - Accuracy: 85.00%, Average BLEU Score: 84.03%
Average BLEU Score at threshold 0.2: 84.03%
Threshold: 0.2 - Accuracy: 85.00%, Average BLEU Score: 84.03%
Average BLEU Score at threshold 0.3: 84.03%
Threshold: 0.3 - Accuracy: 84.38%, Average BLEU Score: 84.03%
Average BLEU Score at threshold 0.4: 84.03%
Threshold: 0.4 - Accuracy: 84.38%, Average BLEU Score: 84.03%
Average BLEU Score at threshold 0.5: 84.03%
Threshold: 0.5 - Accuracy: 83.44%, Average BLEU Score: 84.03%
Average BLEU Score at threshold 0.6: 84.03%
Threshold: 0.6 - Accuracy: 83.44%, Average BLEU Score: 84.03%
Average BLEU Score at threshold 0.7: 83.72%
Threshold: 0.7 - Accuracy: 83.12%, Average BLEU Score: 83.72%
Average BLEU Score at threshold 0.8: 81.85%
Threshold: 0.8 - Accuracy: 81.25%, Average BLEU Score: 81.85%
Предсказания сохранены в 'FAQ_test_predictions.xlsx'


In [15]:
# Пример использования: ввод вопроса и получение ответа
user_question = "Волонтерство НИЯУ МИФИ"
answer = find_answer(user_question)
print("Ответ:", answer)

Ответ: ('чтобы стать волонтером нужно подать заявку через сайт или обратиться в студенческий совет', 0.6091877222061157)
