# **Rouge**

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from nltk.util import ngrams

def rouge_n(reference, candidate, n=1):
    reference_ngrams = list(ngrams(reference.split(), n))
    candidate_ngrams = list(ngrams(candidate.split(), n))
    intersection = set(reference_ngrams) & set(candidate_ngrams)
    recall = len(intersection) / len(reference_ngrams) if len(reference_ngrams) > 0 else 0
    precision = len(intersection) / len(candidate_ngrams) if len(candidate_ngrams) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return {'precision': precision, 'recall': recall, 'f1': f1_score}

# Пример использования
reference = "This is a test sentence for the rouge metric"
candidate = "This is a sentence for testing the rouge metric"
print(rouge_n(reference, candidate, n=2))



{'precision': 0.625, 'recall': 0.625, 'f1': 0.625}


In [None]:
from nltk.util import ngrams

def rouge_1(reference, candidate):
    ref_ngrams = set(reference.split())
    cand_ngrams = set(candidate.split())
    overlap = len(ref_ngrams & cand_ngrams)
    recall = overlap / len(ref_ngrams) if ref_ngrams else 0
    precision = overlap / len(cand_ngrams) if cand_ngrams else 0
    f1 = 2 * recall * precision / (recall + precision) if (recall + precision) else 0
    return {'precision': precision, 'recall': recall, 'f1': f1}

def rouge_2(reference, candidate):
    ref_ngrams = set(ngrams(reference.split(), 2))
    cand_ngrams = set(ngrams(candidate.split(), 2))
    overlap = len(ref_ngrams & cand_ngrams)
    recall = overlap / len(ref_ngrams) if ref_ngrams else 0
    precision = overlap / len(cand_ngrams) if cand_ngrams else 0
    f1 = 2 * recall * precision / (recall + precision) if (recall + precision) else 0
    return {'precision': precision, 'recall': recall, 'f1': f1}

def rouge_l(reference, candidate):
    ref_words, cand_words = reference.split(), candidate.split()
    lcs = sum(1 for word in ref_words if word in cand_words)
    recall = lcs / len(ref_words) if ref_words else 0
    precision = lcs / len(cand_words) if cand_words else 0
    f1 = 2 * recall * precision / (recall + precision) if (recall + precision) else 0
    return {'precision': precision, 'recall': recall, 'f1': f1}

# Пример использования
reference = "Этот день оказался необычайно светлым и солнечным. Люди с удовольствием прогуливались по городу, радуясь приятной погоде и общению"
candidate = "Сегодняшний день выдался особенно ясным и солнечным. Люди с радостью гуляли по улицам, наслаждаясь теплой погодой и общением"

print("ROUGE-1:", rouge_1(reference, candidate))
print("ROUGE-2:", rouge_2(reference, candidate))
print("ROUGE-L:", rouge_l(reference, candidate))

ROUGE-1: {'precision': 0.17647058823529413, 'recall': 0.42857142857142855, 'f1': 0.25}
ROUGE-2: {'precision': 0.058823529411764705, 'recall': 0.16666666666666666, 'f1': 0.08695652173913045}
ROUGE-L: {'precision': 0.16666666666666666, 'recall': 0.42857142857142855, 'f1': 0.24}


# **Meteor**

In [None]:
import nltk
nltk.download('wordnet')
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import single_meteor_score

# Предсказанный перевод (гипотеза) и референтный перевод (эталон)
hypothesis = "Это тестовое предложение для rouge metric"
reference = "Это предложение для тестирования показателя rouge metric"

# BLEU: расчет точности
def calculate_bleu_precision(reference, hypothesis):
    # Считаем BLEU для биграмм (BLEU-2) или других n-грамм
    smoothing = SmoothingFunction().method1
    bleu_score = sentence_bleu([reference], hypothesis, smoothing_function=smoothing, weights=(0.5, 0.5))
    return bleu_score

# METEOR: расчет точности
def calculate_meteor_precision(reference, hypothesis):
    # Преобразуем списки токенов в строки для работы с single_meteor_score
    reference_str = reference.split()
    hypothesis_str = hypothesis.split()
    meteor_score = single_meteor_score(reference_str, hypothesis_str)
    return meteor_score

# Вывод результатов
bleu_precision = calculate_bleu_precision(reference, hypothesis)
meteor_precision = calculate_meteor_precision(reference, hypothesis)

print("BLEU Precision:", bleu_precision)
print("METEOR Precision:", meteor_precision)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


NameError: name 'hypothesis' is not defined

In [None]:
import nltk
nltk.download('wordnet')
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import single_meteor_score

# Предсказанный перевод (гипотеза) и референтный перевод (эталон)
hypothesis = "This is a test sentence for the rouge metric"
reference= "This is a sentence for testing the rouge metric"

# BLEU: расчет точности
def calculate_bleu_precision(reference, hypothesis):
    # Считаем BLEU для биграмм (BLEU-2) или других n-грамм
    smoothing = SmoothingFunction().method1
    bleu_score = sentence_bleu([reference], hypothesis, smoothing_function=smoothing, weights=(0.4, 0.2))
    return bleu_score

# METEOR: расчет точности
def calculate_meteor_precision(reference, hypothesis):
    # Преобразуем списки токенов в строки для работы с single_meteor_score
    reference_str = reference.split()
    hypothesis_str = hypothesis.split()
    meteor_score = single_meteor_score(reference_str, hypothesis_str)
    return meteor_score

# Вывод результатов
bleu_precision = calculate_bleu_precision(reference, hypothesis)
meteor_precision = calculate_meteor_precision(reference, hypothesis)

print("BLEU Precision:", bleu_precision)
print("METEOR Precision:", meteor_precision)

BLEU Precision: 0.9297050276977528
METEOR Precision: 0.9561042524005487


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [None]:
"ПРОБЛЕМА С РУССКИМ ЯЗЫКОМ"
from nltk.translate.meteor_score import single_meteor_score

def meteor(reference, candidate):
    # Токенизируем строки в списки слов
    reference_tokens = reference.split()
    candidate_tokens = candidate.split()
    return single_meteor_score(reference_tokens, candidate_tokens)

# Пример использования
reference = "This is a test sentence for the meteor metric"
candidate = "This sentence is a test for the meteor metric"
print(meteor(reference, candidate))

0.9561042524005487


# **BLEU**

In [None]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def bleu_score(reference, candidate):
    reference = [reference.split()]  # BLEU принимает список списков
    candidate = candidate.split()
    smoothie = SmoothingFunction().method4  # Сглаживание для коротких текстов
    return sentence_bleu(reference, candidate, smoothing_function=smoothie)

# Пример использования
reference = "This is a test sentence for the BLEU metric"
candidate = "This sentence is a test for the BLEU metric"
print(bleu_score(reference, candidate))

0.45966135761245924


In [None]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def bleu_score(reference, candidate):
    reference = [reference.split()]  # BLEU принимает список списков
    candidate = candidate.split()
    smoothie = SmoothingFunction().method4  # Сглаживание для коротких текстов
    return sentence_bleu(reference, candidate, smoothing_function=smoothie)

# Пример использования
reference = "Это тестовое предложение для rouge metric"
candidate = "Это предложение для тестирования показателя rouge metric"
print(bleu_score(reference, candidate))

0.1225276407029182


In [None]:

!pip install rouge-score nltk

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=06fa3d7f3ddebaaf1443e6b6f9906c68a2288831ee906e6bf0077eac028a3cc6
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


# **Perplexity**

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import math

def calculate_perplexity(text, model_name="gpt2"):
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        log_likelihood = outputs.loss.item()
    perplexity = math.exp(log_likelihood)
    return perplexity

# Пример использования
text = "This is a test sentence for perplexity"
print(calculate_perplexity(text))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



177.5195889547669


In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import math

def calculate_perplexity(text, model_name="gpt2"):
    model = GPT2LMHeadModel.from_pretrained(model_name)
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
        log_likelihood = outputs.loss.item()
    perplexity = math.exp(log_likelihood)
    return perplexity

# Пример использования
text = "Это тестовое предложение для perplexity"
print(calculate_perplexity(text))

13.473752480074115


# **Precision@k и Recall@k**

In [None]:
def precision_at_k(relevant_documents, retrieved_documents, k):
    retrieved_at_k = retrieved_documents[:k]
    true_positives = len(set(retrieved_at_k) & set(relevant_documents))
    return true_positives / k

def recall_at_k(relevant_documents, retrieved_documents, k):
    retrieved_at_k = retrieved_documents[:k]
    true_positives = len(set(retrieved_at_k) & set(relevant_documents))
    return true_positives / len(relevant_documents) if relevant_documents else 0

# Пример использования
relevant_documents = [1, 2, 4, 5]
retrieved_documents = [5, 1, 6, 3]
print(precision_at_k(relevant_documents, retrieved_documents, k=3))
print(recall_at_k(relevant_documents, retrieved_documents, k=3))

0.022727272727272728
0.5


In [None]:
!pip install nltk transformers torch
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=232a4ba25d2a9d2fd2275937079238fea300fd96617457f3633bf38fe1721d51
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [None]:
!transformers-cli login
!huggingface-cli login instead

2024-11-06 09:05:53.492069: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-06 09:05:53.511133: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-06 09:05:53.516869: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[1m[31mERROR! `huggingface-cli login` uses an outdated login mechanism that is not compatible with the Hugging Face Hub backend anymore. Please use `huggingface-cli login instead.[0m
usage: huggingface-cli <command> [<args>]
huggingface-cli: error: unrecognized arguments: instead


In [None]:
from transformers import pipeline

# Загрузка модели question-answering
model_name = "bert-large-uncased"
qa_pipeline = pipeline("question-answering", model=model_name)

# Пример запроса
context = "Bert is a transformer model used for NLP tasks."
question = "What is Bert used for?"
answer = qa_pipeline(question=question, context=context)

print(answer)

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-large-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


{'score': 0.008829963393509388, 'start': 33, 'end': 46, 'answer': 'for NLP tasks'}


In [None]:
import nltk
nltk.download('wordnet')
import os
import pandas as pd
from transformers import pipeline
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score
from rouge_score import rouge_scorer
from sklearn.metrics import precision_score, recall_score

# Чтение текстового файла
def load_text_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            data = file.read()
        print(f"Текст загружен: {data[:100]}...")  # Показывает первые 100 символов файла
        return data
    except Exception as e:
        print(f"Ошибка при чтении текстового файла: {e}")
        return ""

# Чтение Excel файла
def load_excel(file_path):
    try:
        df = pd.read_excel(file_path)
        print(f"Первая строка Excel файла: {df.head(1)}")  # Проверка первой строки
        return df
    except Exception as e:
        print(f"Ошибка при чтении Excel файла: {e}")
        return pd.DataFrame()

# Преобразуем данные из DataFrame в текст
def convert_df_to_text(df):
    text = ""
    for index, row in df.iterrows():
        row_text = " ".join([str(value) for value in row])
        text += row_text + "\n"
    return text

# Функция для получения ответа на вопрос
def answer_question(model, question, context):
    try:
        result = model(question=question, context=context)
        return result['answer']
    except Exception as e:
        print(f"Ошибка при получении ответа на вопрос: {e}")
        return ""

# Метрики
def calculate_bleu(reference, hypothesis):
    reference_tokens = reference.split()
    hypothesis_tokens = hypothesis.split()
    return sentence_bleu([reference_tokens], hypothesis_tokens)

def calculate_meteor(reference, hypothesis):
    reference_tokens = reference.split()
    hypothesis_tokens = hypothesis.split()
    return meteor_score([reference_tokens], hypothesis_tokens)

def calculate_rouge(reference, hypothesis):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, hypothesis)
    return {key: score.fmeasure for key, score in scores.items()}

def calculate_precision_and_recall(reference, hypothesis):
    ref_tokens = reference.split()
    hyp_tokens = hypothesis.split()
    common_tokens = set(ref_tokens) & set(hyp_tokens)

    precision = len(common_tokens) / len(hyp_tokens) if len(hyp_tokens) > 0 else 0
    recall = len(common_tokens) / len(ref_tokens) if len(ref_tokens) > 0 else 0
    return precision, recall

# Основная функция для обработки
def main():
    # Ввод пользователя: модель для вопрос-ответ
    model_name = input("Введите модель для вопрос-ответ (например, 'distilbert-base-uncased'): ")

    # Загрузка модели вопрос-ответ
    try:
        qa_pipeline = pipeline("question-answering", model=model_name)
        print(f"Модель '{model_name}' успешно загружена!")
    except Exception as e:
        print(f"Ошибка загрузки модели: {e}")
        return

    # Ввод файла
    file_path = input("Введите путь к файлу (Excel или текстовый): HR-бот.txt ")

    # Проверка существования файла
    if not os.path.exists(file_path):
        print(f"Ошибка: файл по пути {file_path} не найден.")
        return

    # Определяем формат файла
    if file_path.endswith('.xlsx'):
        context_text = convert_df_to_text(load_excel(file_path))
    elif file_path.endswith('.txt'):
        context_text = load_text_file(file_path)
    else:
        print("Неподдерживаемый формат файла.")
        return

    # Ввод вопроса
    question = input("Введите ваш вопрос: ")

    # Эталонный ответ (можно улучшить путем ввода эталона)
    correct_answer = input("Введите эталонный ответ (если есть, иначе оставьте пустым): ")

    # Получаем ответ от модели
    model_answer = answer_question(qa_pipeline, question, context_text)
    print(f"Ответ модели: {model_answer}")

    # Если эталонный ответ был введен, оцениваем с использованием метрик
    if correct_answer:
        metrics = {
            "BLEU": calculate_bleu(correct_answer, model_answer),
            "METEOR": calculate_meteor(correct_answer, model_answer),
            "ROUGE": calculate_rouge(correct_answer, model_answer),
            "Precision и Recall": calculate_precision_and_recall(correct_answer, model_answer)
        }

        # Вывод метрик
        for metric, value in metrics.items():
            if isinstance(value, dict):
                print(f"{metric}:")
                for sub_metric, sub_value in value.items():
                    print(f"  {sub_metric}: {sub_value:.4f}")
            elif isinstance(value, tuple):
                precision, recall = value
                print(f"{metric} - Precision: {precision:.4f}, Recall: {recall:.4f}")
            else:
                print(f"{metric}: {value:.4f}")

# Запуск основного процесса
if __name__ == "__main__":
    main()



[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Модель 'distilbert-base-uncased' успешно загружена!
Текст загружен: ﻿Кнопка /start
Добро пожаловать в дружную команду УЦСБ. Это наш корпоративный бот. Он знает ответы н...
Ответ модели: работодателю.
 
Все обходимые
BLEU: 0.0000
METEOR: 0.0000
ROUGE:
  rouge1: 0.0000
  rouge2: 0.0000
  rougeL: 0.0000
Precision и Recall - Precision: 0.0000, Recall: 0.0000
