In [24]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from huggingface_hub import login

login("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")

zephyr_id = "HuggingFaceH4/zephyr-7b-beta"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    zephyr_id,
    quantization_config=quantization_config,
    device_map="cuda:0",
)


tokenizer = AutoTokenizer.from_pretrained(zephyr_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\antoj\.cache\huggingface\token
Login successful




Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [25]:
def chat_with_model(messages, model, tokenizer, terminators):

    input_ids = tokenizer.apply_chat_template(
        messages, add_generation_prompt=True, return_tensors="pt"
    ).to(model.device)

    outputs = model.generate(
        input_ids,
        max_new_tokens=128,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )

    response = outputs[0][input_ids.shape[-1] :]
    return tokenizer.decode(response, skip_special_tokens=True)

In [26]:
from datasets import load_dataset

dataset = load_dataset("timdettmers/openassistant-guanaco")

dataset_train = dataset["test"]

Repo card metadata block was not found. Setting CardData to empty.


In [27]:
human_questions = []
human_answer = []

# for index in range(len(dataset_train["text"])):
for index in range(10):
    string = dataset_train["text"][index]
    segments = string.split("### Human:")
    for segment in segments[1:]:
        # This if is added to add to the list of questions only those that have then received an answer
        if "### Assistant:" in segment:
            human_questions.append(segment.split("### Assistant:")[0].strip())

    segments = string.split("### Assistant:")
    for segment in segments[1:]:
        human_answer.append(segment.split("### Human:")[0].strip())

print(len(human_questions))
print(len(human_answer))

12
12


In [28]:
# Dataframe with questions and answers
import pandas as pd

df_qa = pd.DataFrame(
    list(zip(human_questions, human_answer)), columns=["question", "human_answer"]
)

In [29]:
df_qa = df_qa.head(5)

In [30]:
# Dictionary with questions and answers where index is the key
qa_dict = df_qa.to_dict(orient="index")
qa_dict[0]

{'question': 'Напиши функцию на языке swift, которая сортирует массив целых чисел, а затем выводит его на экран',
 'human_answer': 'Вот функция, которая сортирует массив целых чисел и выводит его на экран:\n\n```swift\nfunc sortAndPrintArray(_ array: [Int]) {\n  // Создаем копию массива, чтобы не изменять исходный\n  var sortedArray = array\n  // Сортируем массив по возрастанию\n  sortedArray.sort()\n  // Выводим отсортированный массив на экран\n  print(sortedArray)\n}\n```\n\n\nТы можешь проверить работу функции, вызвав ее с любым массивом целых чисел, например:\n\n```swift\nsortAndPrintArray([5, 2, 7, 9, -3])\n```\n\nНа экране появится:\n\n```sh\n[-3, 2, 5, 7, 9]\n```\n---\nНадеюсь, что я смог тебе помочь. Нужно ли тебе что-нибудь ещё? 😊'}

In [31]:
assistant_answers = []
assistant_scores = []

for index in range(5):
    messages = [
        {
            "role": "system",
            "content": "Normaly answer to the question"
        },

        {
            "role": "user",
            "content": df_qa["question"][index]
        },
    ]

    response = chat_with_model(messages, model, tokenizer, terminators)
    assistant_answers.append(response)
    assistant_scores.append(0)

In [32]:
df_qa["assistant_answer"] = assistant_answers

In [None]:
!pip install python-Levenshtein
!pip install jiwer

# Levenshtein Distance

The Levenshtein distance between two strings $x = x_1 x_2 \ldots x_m$ and $y = y_1 y_2 \ldots y_n$ is defined as the minimum number of operations required to transform $x$ into $y$. The allowed operations are:

1. Insertion of a character.
2. Deletion of a character.
3. Substitution of one character for another.

#### Formal Definition

Let $D(i, j)$ denote the Levenshtein distance between the prefixes $x_1 x_2 \ldots x_i$ and $y_1 y_2 \ldots y_j$. The matrix $D$ of dimensions $(m+1) \times (n+1)$ is defined as:

$$
D(i, j) = \begin{cases} 
i & \text{if} \; j = 0 \\
j & \text{if} \; i = 0 \\
\min \begin{cases} 
D(i-1, j) + 1 \\
D(i, j-1) + 1 \\
D(i-1, j-1) + \delta(x_i, y_j) 
\end{cases} & \text{otherwise}
\end{cases}
$$

where:

$$
\delta(x_i, y_j) = \begin{cases} 
0 & \text{if} \; x_i = y_j \\
1 & \text{if} \; x_i \neq y_j 
\end{cases}
$$

In [39]:
import Levenshtein

predictions = list(df_qa["assistant_answer"].astype(str))
references = list(df_qa["human_answer"].astype(str))

distances = [Levenshtein.distance(t, p) for t, p in zip(references, predictions)]
final_distance = sum(distances) / len(distances)

print(final_distance)

[454, 1331, 1635, 982, 509]
982.2


# Word Error Rate (WER)

The Word Error Rate (WER) is a metric used to evaluate the accuracy of an automatic speech recognition (ASR) system. It measures the number of errors in the transcribed output compared to a reference transcription, normalized by the total number of words in the reference.

#### Definition

WER is defined as:

$$
\text{WER} = \frac{S + D + I}{N}
$$

where:
- $S$ is the number of substitutions.
- $D$ is the number of deletions.
- $I$ is the number of insertions.
- $N$ is the total number of words in the reference transcription.

In [47]:
import jiwer

transformation = jiwer.Compose([
    jiwer.ToLowerCase(),
    jiwer.RemoveMultipleSpaces(),
    jiwer.RemovePunctuation(),
    jiwer.Strip(),
    jiwer.ExpandCommonEnglishContractions()
])

errors = []
for true, pred in zip(references, predictions):
    transformed_true = transformation(true)
    transformed_pred = transformation(pred)
    wer_score = jiwer.wer(transformed_true, transformed_pred)
    errors.append(wer_score)

average_wer = sum(errors) / len(errors) if errors else 0

print(average_wer)

0.9200043682659775


# Cosine similarity

In [43]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

vectorizer = TfidfVectorizer()
tfidf = vectorizer.fit_transform(references + predictions)

similarities = [cosine_similarity(tfidf[i:i+1], tfidf[len(references)+i:len(predictions)+i+1])[0][0] for i in range(len(references))]
similarity = sum(similarities) / len(similarities)

print(similarity)

0.3678937538651776
