In [None]:
# Установка необходимых библиотек
!pip install llama-cpp-python fire
!pip install zstandard


# Скачивание модели
!wget https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf

# Скачивание датасета
!wget https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga/resolve/main/ru_turbo_saiga.jsonl.zst

import json
import zstandard as zstd
import torch
from llama_cpp import Llama
from tqdm import tqdm
from torch.nn import functional as F
import random


# Распаковка датасета
with open("ru_turbo_saiga.jsonl.zst", "rb") as f:
    decompressor = zstd.ZstdDecompressor()
    decompressed_data = decompressor.decompress(f.read())
    dataset = [json.loads(line) for line in decompressed_data.decode('utf-8').splitlines()]

# Загрузка модели
model_file = "model-q4_K.gguf"
model = Llama(model_path=model_file)

# Функция оценки образца
def evaluate(sample):
    # Промпт для образца
    prompt = "\n".join([msg["content"] for msg in sample["messages"][:2]])
    # Генерируем ответ
    outputs = model(prompt, max_tokens=20, temperature=0.2, top_p=0.95, echo=False)
    predicted_answer = outputs['choices'][0]['text'].strip()

    # Токенизируем истинный и предсказанный ответы
    answer = model.tokenize(sample["messages"][2]["content"].encode('utf-8'))
    predict = model.tokenize(predicted_answer.encode('utf-8'))

    # Преобразуем токены в тензоры с плавающей точкой
    answer_tensor = torch.tensor(answer, dtype=torch.float32)
    predict_tensor = torch.tensor(predict, dtype=torch.float32)

    # Выводим типы и формы тензоров для отладки
    print(f"Answer tensor dtype: {answer_tensor.dtype}, shape: {answer_tensor.shape}")
    print(f"Predict tensor dtype: {predict_tensor.dtype}, shape: {predict_tensor.shape}")

    # Проверяем, что тензоры имеют одинаковую форму
    if answer_tensor.shape != predict_tensor.shape:
        print(f"Shapes do not match: {answer_tensor.shape} != {predict_tensor.shape}")
        return 0

    # Выравниваем тензоры по длине
    min_len = min(answer_tensor.shape[0], predict_tensor.shape[0])
    answer_tensor_trimmed = answer_tensor[:min_len]
    predict_tensor_trimmed = predict_tensor[:min_len]

    # Вычисляем косинусное сходство между истинным и предсказанным ответом
    cos_sim = F.cosine_similarity(answer_tensor_trimmed.unsqueeze(0), predict_tensor_trimmed.unsqueeze(0), dim=1)
    if cos_sim > 0.5:
        return 1
    else:
        return 0

# Массив с оценками
success_rate = []
number_of_eval_samples = 5

# Отбираем записи и делаем по ним цикл с отображением прогресс бара
random.shuffle(dataset)
for s in tqdm(dataset[:number_of_eval_samples]):
    # Результаты оценки складываем в массив
    success_rate.append(evaluate(s))

# Вычисление точности
accuracy = sum(success_rate) / len(success_rate)

print(f"Точность: {accuracy * 100:.2f}%")

--2024-11-13 20:16:35--  https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf
Resolving huggingface.co (huggingface.co)... 13.35.210.66, 13.35.210.114, 13.35.210.61, ...
Connecting to huggingface.co (huggingface.co)|13.35.210.66|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.hf.co/repos/79/b3/79b3fc4694b2c3a22273003a1de570f145c14f0586c212c28c28e302adf5d3d6/2798f33ff63c791a21f05c1ee9a10bc95630b17225c140c197188a3d5cf32644?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-q4_K.gguf%3B+filename%3D%22model-q4_K.gguf%22%3B&Expires=1731788195&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczMTc4ODE5NX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy83OS9iMy83OWIzZmM0Njk0YjJjM2EyMjI3MzAwM2ExZGU1NzBmMTQ1YzE0ZjA1ODZjMjEyYzI4YzI4ZTMwMmFkZjVkM2Q2LzI3OThmMzNmZjYzYzc5MWEyMWYwNWMxZWU5YTEwYmM5NTYzMGIxNzIyNWMxNDBjMTk3MTg4YTNkNWNmMzI2NDQ%7EcmVzcG9uc2UtY29udGVu

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from model-q4_K.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = models
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
llama_mode

Answer tensor dtype: torch.float32, shape: torch.Size([16])
Predict tensor dtype: torch.float32, shape: torch.Size([19])
Shapes do not match: torch.Size([16]) != torch.Size([19])


llama_perf_context_print:        load time =   33213.62 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   164 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   72850.42 ms /   183 tokens
 40%|████      | 2/5 [01:57<03:04, 61.39s/it]Llama.generate: 2 prefix-match hit, remaining 306 prompt tokens to eval


Answer tensor dtype: torch.float32, shape: torch.Size([29])
Predict tensor dtype: torch.float32, shape: torch.Size([19])
Shapes do not match: torch.Size([29]) != torch.Size([19])


llama_perf_context_print:        load time =   33213.62 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   306 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =  113791.96 ms /   307 tokens
 60%|██████    | 3/5 [03:51<02:50, 85.32s/it]Llama.generate: 2 prefix-match hit, remaining 252 prompt tokens to eval


Answer tensor dtype: torch.float32, shape: torch.Size([38])
Predict tensor dtype: torch.float32, shape: torch.Size([1])
Shapes do not match: torch.Size([38]) != torch.Size([1])


llama_perf_context_print:        load time =   33213.62 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   252 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =  106580.56 ms /   271 tokens
 80%|████████  | 4/5 [05:38<01:33, 93.72s/it]Llama.generate: 2 prefix-match hit, remaining 127 prompt tokens to eval


Answer tensor dtype: torch.float32, shape: torch.Size([25])
Predict tensor dtype: torch.float32, shape: torch.Size([18])
Shapes do not match: torch.Size([25]) != torch.Size([18])


llama_perf_context_print:        load time =   33213.62 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   127 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    19 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   60387.74 ms /   146 tokens
100%|██████████| 5/5 [06:38<00:00, 79.73s/it]

Answer tensor dtype: torch.float32, shape: torch.Size([27])
Predict tensor dtype: torch.float32, shape: torch.Size([19])
Shapes do not match: torch.Size([27]) != torch.Size([19])
Точность: 0.00%





In [None]:
# Установка необходимых библиотек
!pip install llama-cpp-python fire
!pip install zstandard
!pip install nltk

import json
import zstandard as zstd
import torch
from llama_cpp import Llama
from tqdm import tqdm
import random
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Скачивание модели
!wget https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf

# Скачивание датасета
!wget https://huggingface.co/datasets/IlyaGusev/ru_turbo_saiga/resolve/main/ru_turbo_saiga.jsonl.zst

# Распаковка датасета
with open("ru_turbo_saiga.jsonl.zst", "rb") as f:
    decompressor = zstd.ZstdDecompressor()
    decompressed_data = decompressor.decompress(f.read())
    dataset = [json.loads(line) for line in decompressed_data.decode('utf-8').splitlines()]

# Загрузка модели
model_file = "model-q4_K.gguf"
model = Llama(model_path=model_file)

# Функция оценки образца
def evaluate(sample):
    # Промпт для образца
    prompt = "\n".join([msg["content"] for msg in sample["messages"][:2]])
    # Генерируем ответ
    outputs = model(prompt, max_tokens=50, temperature=0.2, top_p=0.95, echo=False)  # Уменьшаем количество токенов для генерации
    predicted_answer = outputs['choices'][0]['text'].strip()

    # Истинный ответ
    true_answer = sample["messages"][2]["content"]

    # Вычисляем BLEU score
    bleu_score = sentence_bleu([true_answer.split()], predicted_answer.split())

    # Вычисляем BLEU score с использованием функции сглаживания
    smoothing_function = SmoothingFunction().method1
    bleu_score = sentence_bleu([true_answer.split()], predicted_answer.split(), smoothing_function=smoothing_function)

    # Если BLEU score выше порога, считаем это успехом
    if bleu_score > 0.5:
        return 1
    else:
        return 0

# Массив с оценками
success_rate = []
number_of_eval_samples = 3  # Уменьшаем количество образцов для оценки

# Отбираем записи и делаем по ним цикл с отображением прогресс бара
random.shuffle(dataset)
for s in tqdm(dataset[:number_of_eval_samples]):
    # Результаты оценки складываем в массив
    success_rate.append(evaluate(s))

# Вычисление точности
accuracy = sum(success_rate) / len(success_rate)

print(f"Точность: {accuracy * 100:.2f}%")

--2024-11-13 20:43:43--  https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf
Resolving huggingface.co (huggingface.co)... 3.169.137.5, 3.169.137.111, 3.169.137.119, ...
Connecting to huggingface.co (huggingface.co)|3.169.137.5|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.hf.co/repos/79/b3/79b3fc4694b2c3a22273003a1de570f145c14f0586c212c28c28e302adf5d3d6/2798f33ff63c791a21f05c1ee9a10bc95630b17225c140c197188a3d5cf32644?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-q4_K.gguf%3B+filename%3D%22model-q4_K.gguf%22%3B&Expires=1731789823&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczMTc4OTgyM319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy83OS9iMy83OWIzZmM0Njk0YjJjM2EyMjI3MzAwM2ExZGU1NzBmMTQ1YzE0ZjA1ODZjMjEyYzI4YzI4ZTMwMmFkZjVkM2Q2LzI3OThmMzNmZjYzYzc5MWEyMWYwNWMxZWU5YTEwYmM5NTYzMGIxNzIyNWMxNDBjMTk3MTg4YTNkNWNmMzI2NDQ%7EcmVzcG9uc2UtY29udGVud

llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from model-q4_K.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = models
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
llama_mode

Точность: 0.00%



