# Métricas

In [None]:
!pip install rouge
!pip install gensim
!pip3 install wmd
!pip install POT
!pip install bert-score

In [2]:
from rouge import Rouge
from nltk.translate import meteor_score
from nltk.tokenize import word_tokenize
import gensim
from wmd import WMD
import gensim.downloader as api
from nltk.tokenize import word_tokenize
from bert_score import score
import pandas as pd

In [13]:
import nltk

In [16]:
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [3]:
# Load a pre-trained Word2Vec model
model = api.load('word2vec-google-news-300')



## ROUGE

In [4]:
def calculate_rouge_scores(reference_text, model_response):
    """
    Calculates ROUGE scores between a reference text and a model response.

    Args:
      reference_text: The reference text.
      model_response: The model response.

    Returns:
      A dictionary containing the ROUGE-1, ROUGE-2, and ROUGE-L scores.
    """
    rouge = Rouge(metrics=['rouge-1', 'rouge-2', 'rouge-l'], stats=['f', 'p', 'r'])
    scores = rouge.get_scores(model_response, reference_text, avg=True)
    return scores

## METEOR

In [5]:
def calculate_meteor_score(reference, hypothesis):
    """
    Calculates the METEOR score for a given hypothesis text with respect to a reference text.

    Args:
      reference (str): Reference text.
      hypothesis (str): Machine-generated text.

    Returns:
      float: METEOR score.
    """
    # Tokenization of texts
    reference_tokens = word_tokenize(reference)
    hypothesis_tokens = word_tokenize(hypothesis)

    # Calculation of the METEOR score
    score = meteor_score.meteor_score([reference_tokens], hypothesis_tokens)
    return score

## WMD

In [6]:
def calculate_wmd_distance(doc1, doc2, w2v_model):
    """
    Computes the Word Mover's Distance between two documents using a pre-trained word2vec model.

    Args:
        doc1 (str): First document.
        doc2 (str): Second document.
        w2v_model (gensim.models.KeyedVectors): Pre-trained Word2Vec model.

    Returns:
        float: WMD distance between the documents.
    """
    # Tokenize the documents and filter words that are in the model's vocabulary
    tokens1 = [word for word in word_tokenize(doc1.lower()) if word in w2v_model.key_to_index]
    tokens2 = [word for word in word_tokenize(doc2.lower()) if word in w2v_model.key_to_index]

    # Calculate the Word Mover's Distance (WMD) using the wmdistance function from gensim
    distance = w2v_model.wmdistance(tokens1, tokens2)
    return distance

## BERTScore

In [7]:
def calculate_bert_score(candidates, references, lang='en', model_type=None):
    """
    Calculates the BERTScore between candidate texts and reference texts.

    Args:
        candidates (list of str): List of texts generated by the machine.
        references (list of str): List of reference texts.
        lang (str): Language code for which the embeddings will be used (default is 'en' for English).
        model_type (str): Type of BERT model to be used (e.g., 'bert-base-uncased'). If None, uses the default model for the language.

    Returns:
        tuple: Three lists containing precision, recall, and F1 scores for each candidate-reference pair.
    """
    # If a specific model is not provided, set a default one
    if not model_type:
        model_type = 'bert-base-multilingual-cased' if lang != 'en' else 'bert-base-uncased'

    # Calculate the BERTScore
    P, R, F1 = score(candidates, references, lang=lang, model_type=model_type, verbose=True)

    return P, R, F1

## VW Taos 2023

### 1. O que é o controle automático de distância?

In [8]:
reference_text = "O controle automático de distância (ACC = Adaptive Cruise Control) mantém constante uma velocidade ajustada pelo condutor. Se o veículo se aproximar de um veículo à frente, o ACC ajustará a velocidade automaticamente e através disso mantém a distância ajustada por eles."
model_response = "O controle automático de distância, também conhecido como ACC (Adaptive Cruise Control), é um sistema que mantém constante uma velocidade ajustada pelo condutor. Se o veículo se aproximar de um veículo à frente, o ACC ajustará a velocidade automaticamente para manter a distância ajustada."

In [9]:
scores = calculate_rouge_scores(reference_text, model_response)

In [17]:
meteor = calculate_meteor_score(reference_text, model_response)

In [20]:
distance = calculate_wmd_distance(reference_text, model_response, model)

In [21]:
P, R, F1 = calculate_bert_score([model_response], [reference_text])

calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.61 seconds, 1.65 sentences/sec


In [23]:
data = {
    "Métrica": ["ROUGE-L", "METEOR", "WMD Distance", "BERTScore"],
    "Precision": [f"{scores['rouge-l']['p']:.2f}", None, None, f"{P.item():.2f}"],
    "Recall": [f"{scores['rouge-l']['r']:.2f}", None, None, f"{R.item():.2f}"],
    "F1": [f"{scores['rouge-l']['f']:.2f}", None, None, f"{F1.item():.2f}"],
    "Valor": [None, f"{meteor:.2f}", f"{distance:.2f}", None]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Métrica,Precision,Recall,F1,Valor
0,ROUGE-L,0.69,0.74,0.71,
1,METEOR,,,,0.84
2,WMD Distance,,,,0.24
3,BERTScore,0.9,0.91,0.91,


### 2. O que ocorre no caso de uma intervenção de frenagem automática?

In [24]:
reference_text = "No caso de um obstáculo, o veículo desacelera até parar e é mantido parado por cerca de 2 segundos. 1. Parar o veículo com o pedal de freio após a frenagem. 2. Verificar os arredores."
model_response = "No caso de uma intervenção de frenagem automática, se um obstáculo for detectado, o veículo desacelera até parar e é mantido parado por cerca de 2 segundos. Depois disso, é recomendado que o condutor pare o veículo com o pedal de freio após a frenagem e verifique os arredores."

In [25]:
scores = calculate_rouge_scores(reference_text, model_response)

In [26]:
meteor = calculate_meteor_score(reference_text, model_response)

In [27]:
distance = calculate_wmd_distance(reference_text, model_response, model)

In [28]:
P, R, F1 = calculate_bert_score([model_response], [reference_text])

calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.90 seconds, 1.11 sentences/sec


In [30]:
data = {
    "Métrica": ["ROUGE-L", "METEOR", "WMD Distance", "BERTScore"],
    "Precision": [f"{scores['rouge-l']['p']:.2f}", None, None, f"{P.item():.2f}"],
    "Recall": [f"{scores['rouge-l']['r']:.2f}", None, None, f"{R.item():.2f}"],
    "F1": [f"{scores['rouge-l']['f']:.2f}", None, None, f"{F1.item():.2f}"],
    "Valor": [None, f"{meteor:.2f}", f"{distance:.2f}", None]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Métrica,Precision,Recall,F1,Valor
0,ROUGE-L,0.64,0.86,0.74,
1,METEOR,,,,0.77
2,WMD Distance,,,,0.26
3,BERTScore,0.85,0.91,0.88,


## Word Error Rate (áudio)

In [1]:
!pip install --upgrade evaluate jiwer

Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jiwer
  Downloading jiwer-3.0.4-py3-none-any.whl (21 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from evaluate)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from evaluate)
  Downl

In [2]:
from evaluate import load
from transformers.models.whisper.english_normalizer import BasicTextNormalizer


normalizer = BasicTextNormalizer()
wer_metric = load("wer")

Downloading builder script:   0%|          | 0.00/4.49k [00:00<?, ?B/s]

#### Pergunta 1

In [32]:
pergunta_1_gerada = 'O que é o controle automático de distância.'
pergunta_1_esperada = 'O que é o controle automático de distância?'
normalized_prediction_1 = normalizer(pergunta_1_gerada)
normalized_reference_1 = normalizer(pergunta_1_esperada)

In [33]:
wer = wer_metric.compute(references=[normalized_reference_1], predictions=[normalized_prediction_1])

print(wer)

0.0


#### Pergunta 2

In [34]:
pergunta_2_gerada = 'O que ocorre no caso de uma intervenção de frenagem automática?'
pergunta_2_esperada = 'O que ocorre no caso de uma intervenção de frenagem automática?'
normalized_prediction_2 = normalizer(pergunta_2_gerada)
normalized_reference_2 = normalizer(pergunta_2_esperada)

In [31]:
wer = wer_metric.compute(references=[normalized_reference_2], predictions=[normalized_prediction_2])

print(wer)

0.0
