<a href="https://colab.research.google.com/github/fishan/Veector/blob/main/onnx.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install onnx onnxruntime gdown transformers psutil

In [2]:
import onnx
import os
import numpy as np
import logging
import zipfile
import psutil
import onnxruntime as ort
from google.colab import drive
from transformers import AutoTokenizer
from onnx.external_data_helper import load_external_data_for_model
from huggingface_hub import hf_hub_download

In [3]:
#--------------------------------------------------
# Настройка логирования
logging.basicConfig(level=logging.INFO, format="🟢 [LOG] %(asctime)s - %(message)s")
logger = logging.getLogger()

In [None]:
#--------------------------------------------------
# 🔹 Загрузка модели с Hugging Face (через huggingface_hub)
model_repo = "onnx-community/DeepSeek-R1-Distill-Qwen-1.5B-ONNX"
model_filename = "onnx/model_q4f16.onnx"
model_path = hf_hub_download(repo_id=model_repo, filename=model_filename)
logger.info(f"✅ Модель загружена: {model_path}")

In [5]:
#--------------------------------------------------
# 🔹 Загружаем ONNX модель
model = onnx.load(model_path)

# 🔹 Разбиваем на части по 1 МБ
chunk_size = 1024 * 1024 * 50  # 1 МБ
split_model_path = "model_split.onnx"
onnx.save_model(model, split_model_path, save_as_external_data=True, all_tensors_to_one_file=False, size_threshold=chunk_size)

# 🔹 Выводим список файлов
files = [f for f in os.listdir() if f.startswith("model_split")]
logger.info(f"📂 Разбитые файлы модели: {files}")

In [6]:
#--------------------------------------------------
# 🔹 Загружаем разбиенную модель в ONNX Runtime
os.environ["ONNX_LOAD_EXTERNAL_LOGGING"] = "1"
onnx_model = onnx.load(split_model_path)
session = ort.InferenceSession(split_model_path)
logger.info("✅ ONNX Runtime загружен.")

#--------------------------------------------------

In [7]:
#--------------------------------------------------
# 🔹 Логируем память
memory_info = psutil.virtual_memory()
logger.info(f"📊 Память: {memory_info.used / (1024 * 1024):.2f} MB / {memory_info.total / (1024 * 1024):.2f} MB")


In [None]:
#--------------------------------------------------
# 🔹 Загружаем токенизатор
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-1.8B-Chat")

In [9]:
#--------------------------------------------------
# 🔹 Конфигурация модели
num_hidden_layers = 28
num_key_value_heads = 2
hidden_size = 1536
head_dim = 128
max_length = 128
max_new_tokens = 512

In [21]:
#--------------------------------------------------
# 🔹 Функция подготовки входных данных
def preprocess_text(text):
    inputs = tokenizer(text, return_tensors="np", padding=False, truncation=True, max_length=max_length)
    input_feed = {
        "input_ids": inputs["input_ids"].astype(np.int64),
        "attention_mask": inputs["attention_mask"].astype(np.int64),
        "position_ids": np.arange(0, inputs["input_ids"].shape[1], dtype=np.int64).reshape(1, -1),
    }
    batch_size = 1
    for i in range(num_hidden_layers):
        input_feed[f"past_key_values.{i}.key"] = np.zeros(
            (batch_size, num_key_value_heads, 0, head_dim), dtype=np.float16
        )
        input_feed[f"past_key_values.{i}.value"] = np.zeros(
            (batch_size, num_key_value_heads, 0, head_dim), dtype=np.float16
        )
    return input_feed, inputs["input_ids"], inputs["attention_mask"]


In [24]:
# Функция для генерации текста
def generate_text(input_feed, input_ids, attention_mask, max_new_tokens):
    generated_ids = input_ids[0].tolist()  # Преобразуем в список из 2D массива
    past_key_values = {k: v for k, v in input_feed.items() if "past_key_values" in k}

    # Первый шаг
    outputs = session.run(None, input_feed)
    next_token = int(np.argmax(outputs[0][:, -1, :], axis=-1)[0])
    generated_ids.append(next_token)

    # Обновление past_key_values
    for i in range(num_hidden_layers):
        past_key_values[f"past_key_values.{i}.key"] = outputs[2 * i + 1]
        past_key_values[f"past_key_values.{i}.value"] = outputs[2 * i + 2]

    # Последующие шаги
    for _ in range(max_new_tokens - 1):
        input_feed = {
            "input_ids": np.array([[next_token]], dtype=np.int64),  # 2D массив
            "attention_mask": np.array([[1]], dtype=np.int64),
            "position_ids": np.array([[len(generated_ids) - 1]], dtype=np.int64),
        }
        input_feed.update(past_key_values)

        outputs = session.run(None, input_feed)
        next_token = int(np.argmax(outputs[0][:, -1, :], axis=-1)[0])
        generated_ids.append(next_token)

        for i in range(num_hidden_layers):
            past_key_values[f"past_key_values.{i}.key"] = outputs[2 * i + 1]
            past_key_values[f"past_key_values.{i}.value"] = outputs[2 * i + 2]

        if next_token == tokenizer.eos_token_id:
            break

    return tokenizer.decode(generated_ids, skip_special_tokens=True)


In [None]:
#--------------------------------------------------
# 🔹 Функция чата
# Функция чата
def chat():
    print("\n🤖 ONNX-Чат активен! Напиши что-нибудь ('выход' для выхода).")
    while True:
        user_input = input("Ты: ")
        if user_input.lower() == "выход":
            print("🤖 Чат завершен.")
            break

        logger.info("Начинаем обработку запроса...")
        input_feed, input_ids, attention_mask = preprocess_text(user_input)

        try:
            response_text = generate_text(input_feed, input_ids, attention_mask, max_new_tokens)
            logger.info(f"Генерация завершена. Использование памяти: {psutil.virtual_memory().used / (1024 * 1024):.2f} MB")
            print(f"🤖 ONNX: {response_text}")
        except Exception as e:
            logger.error(f"Ошибка генерации: {e}")

# Запускаем чат
chat()

In [None]:
#--------------------------------------------------
# 🔹 Архивируем
zip_name = "DeepSeek-Qwen-splited-onnx.zip"
with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for file in files:
        zipf.write(file)
logger.info(f"📦 Архив создан: {zip_name}, размер: {os.path.getsize(zip_name) / (1024 * 1024):.2f} MB")

#--------------------------------------------------
# 🔹 Загружаем на Google Drive
drive.mount('/content/drive')
destination_path = f"/content/drive/My Drive/{zip_name}"
!cp {zip_name} "{destination_path}"
logger.info(f"✅ Архив загружен на Google Drive: {destination_path}")
