<a href="https://colab.research.google.com/github/fishan/Veector/blob/base/Veector_split_DeepSeek_R1_Distill_Qwen_1_5b_int8_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === Cell 0: Install Dependencies ===
!pip install numpy psutil torch transformers accelerate bitsandbytes ipfshttpclient qiskit qiskit-aer requests huggingface_hub -q
print("Dependencies installed/checked.")

In [None]:
# === Cell 1: Imports (Corrected and Simplified - FINAL) ===

# --- Standard Imports ---
import numpy as np
import queue
import threading
import time
import random
import psutil
import os
import gc
import pickle
import hashlib
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple, Union
from google.colab import drive, files, userdata # Keep Colab imports
from huggingface_hub import login             # Keep HF import
from transformers import AutoModelForCausalLM, AutoTokenizer # Keep Transformers imports

print("Standard/External imports loaded.")

# --- Optional Imports ---
try:
    import torch
    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False
    print("Warning: PyTorch not found. GPU features may be limited.")

try:
    import ipfshttpclient
    IPFS_AVAILABLE = True
except ImportError:
    IPFS_AVAILABLE = False
    # print("Warning: ipfshttpclient not found. IPFS features disabled.")

try:
    from qiskit import QuantumCircuit
    from qiskit.providers.aer import Aer
    from qiskit import execute
    QISKIT_AVAILABLE = True
except ImportError:
    QISKIT_AVAILABLE = False
    # print("Warning: Qiskit not found. Quantum operations disabled.")

print("Optional imports checked.")

# --- Veector Project Imports (Single Correct Block) ---
# Ensure core.py, tensors.py (v0.5.1+), veectordb.py (v0.7.1+),
# operations.py, memory.py are uploaded and accessible.
PROJECT_IMPORTS_OK = False
try:
    # Import core classes/functions needed by THIS script (converter/inference)
    from core import Veector
    from veectordb import VeectorDB # Needed if we re-initialize DB here? Usually not.
    from tensors import (
        TensorCoordinate, create_tensor, # Needed for creating tensors
        # Import ALL necessary TAG and GROUP constants for use in this script
        TAG_CAT_TYPE, TAG_CAT_COMPONENT, TAG_CAT_PRECISION, TAG_CAT_MODEL_FAMILY,
        TAG_CAT_LAYER_IDX, TAG_CAT_FUNCTION, TAG_CAT_DATA_SEMANTIC, TAG_CAT_USER,
        TAG_TYPE_PROCESSOR, TAG_TYPE_KNOWLEDGE, TAG_TYPE_CONVERTER, TAG_TYPE_STATE,
        TAG_COMP_WEIGHTS, TAG_COMP_BIAS, TAG_COMP_EMBEDDING, TAG_COMP_ATTN_Q,
        TAG_COMP_ATTN_K, TAG_COMP_ATTN_V, TAG_COMP_ATTN_O, TAG_COMP_ATTN_QKV,
        TAG_COMP_FFN_GATE, TAG_COMP_FFN_UP, TAG_COMP_FFN_DOWN, TAG_COMP_LAYERNORM,
        TAG_COMP_LM_HEAD, TAG_PREC_FLOAT32, TAG_PREC_FLOAT16, TAG_PREC_BFLOAT16,
        TAG_PREC_INT8, TAG_PREC_INT4, TAG_MODEL_QWEN2, TAG_MODEL_LLAMA3,
        TAG_MODEL_DEEPSEEK, TAG_FUNC_LINEAR, TAG_FUNC_ATTENTION, TAG_FUNC_FFN,
        TAG_FUNC_EMBED_LOOKUP, TAG_FUNC_CAST_DTYPE, TAG_FUNC_RESHAPE,
        TAG_SEMANTIC_HIDDEN_STATE, TAG_SEMANTIC_LOGITS, TAG_SEMANTIC_TOKEN_IDS,
        TAG_SEMANTIC_KV_CACHE, tag_layer,
        GROUP_IDX_QWEN_KNOWLEDGE, GROUP_IDX_QWEN_PROCESSOR
    )
    # Only import from operations/memory if DIRECTLY used in THIS script, otherwise core.py handles it
    # from operations import * # Generally not needed here
    # from memory import Memory # Generally not needed here

    print("Veector project components imported successfully for this script.")
    PROJECT_IMPORTS_OK = True

except ImportError as e:
    print(f"---!!! FATAL ERROR (ImportError) !!! ---")
    print(f"Specific error: {e}")
    print(f"Could not import required name from core.py or tensors.py.")
    print(f"Ensure files are UP-TO-DATE (tensors v0.5.1+, core v0.5.2+), CORRECT, and ACCESSIBLE.")
    print(f"-----------------------------------------")
    # Optionally define dummies if needed for notebook structure
except Exception as other_e:
    print(f"---!!! FATAL ERROR (Other Exception during Import) !!! ---")
    print(f"Specific error: {other_e}")
    import traceback
    traceback.print_exc()
    print(f"Check imported files for syntax errors.")
    print(f"----------------------------------------------------------")

# Removed the redundant import check block ('Checking imports...')

In [2]:
# Очистка директории для чистоты эксперимента
!rm -rf data/
output_dir = "data"
os.makedirs(output_dir, exist_ok=True)


In [3]:
# --- Configuration ---

# Аутентификация с Hugging Face
hf_token = userdata.get('HF_TOKEN')
if not hf_token:
    raise ValueError("Добавь HF_TOKEN в секреты Colab!")
login(hf_token)
print("Аутентификация прошла успешно")

# Подключение Google Drive
drive.mount('/content/drive')
print("Google Drive подключён")

model_NAME = "DeepSeek-R1-Distill-Qwen-1.5B"
# Определяем ОДИН основной путь к БД (например, в data/db/)
DB_PATH = Path("./data/db/")
DB_PATH.mkdir(parents=True, exist_ok=True) # Создаем data/db, если ее нет
print(f"Using Main Veector DB Path: {DB_PATH.resolve()}")

# Set data type (bfloat16 might not be fully supported everywhere, float16 is safer)
TORCH_DTYPE = torch.float16 # Use float16 for wider compatibility

print(f"Model to convert: {model_NAME}")
print(f"Target Veector DB: {DB_PATH}")
print(f"Target dtype: {TORCH_DTYPE}")

Аутентификация прошла успешно
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive подключён
Using Main Veector DB Path: /content/data/db
Model to convert: DeepSeek-R1-Distill-Qwen-1.5B
Target Veector DB: data/db
Target dtype: torch.float16


In [None]:
# === Cell 2: Tag Ontology and Mappings Definition (Sync with tensors.py v0.7.0) ===

import torch # Ensure torch is imported for dtype checking if needed later
import numpy as np # Ensure numpy is imported
from typing import Dict, List, Any, Optional, Tuple, Union # Import typing for hints

# --- Version (for tracking changes in this cell) ---
CONVERTER_CELL2_VERSION = "Synced with tensors.py v0.7.0"
print(f"--- Running Converter Cell 2 v{CONVERTER_CELL2_VERSION} ---")

# --- Type Hint for Metadata Tuple (from tensors.py) ---
# Needed if any functions within Colab cells might use this type hint
MetadataTuple = Tuple[
    List[Union[float, int]],         # [0] data_description
    List[int],                       # [1] coord
    List[int],                       # [2] shape
    List[int],                       # [3] tags
    Optional[Dict],                  # [4] ops_sequences
    Optional[Dict],                  # [5] interface
    Optional[List],                  # [6] filters
    Optional[List],                  # [7] exit_gates
    List[int],                       # [8] lifecycle
    Optional[List[str]]              # [9] parents
]

# --- Simplified Tag Ontology (Flat Integers with Ranges - from tensors.py v0.7.0) ---
# 1-9: Tensor Type
TAG_TYPE_PROCESSOR = 1
TAG_TYPE_KNOWLEDGE = 2
TAG_TYPE_CONVERTER = 3
TAG_TYPE_STATE = 4
# 10-19: Model Family
TAG_MODEL_QWEN2 = 10
TAG_MODEL_LLAMA3 = 11
TAG_MODEL_DEEPSEEK = 12
# 20-29: Precision
TAG_PREC_FLOAT32 = 20
TAG_PREC_FLOAT16 = 21
TAG_PREC_BFLOAT16 = 22
TAG_PREC_INT8 = 23
TAG_PREC_INT4 = 24
# 30-49: Component Type
TAG_COMP_WEIGHTS = 30
TAG_COMP_BIAS = 31
TAG_COMP_EMBEDDING = 32
TAG_COMP_ATTN_Q = 33
TAG_COMP_ATTN_K = 34
TAG_COMP_ATTN_V = 35
TAG_COMP_ATTN_O = 36
TAG_COMP_ATTN_QKV = 37
TAG_COMP_FFN_GATE = 38
TAG_COMP_FFN_UP = 39
TAG_COMP_FFN_DOWN = 40
TAG_COMP_LAYERNORM = 41
TAG_COMP_LM_HEAD = 42
# 50-59: Function
TAG_FUNC_LINEAR = 50
TAG_FUNC_ATTENTION = 51
TAG_FUNC_FFN = 52
TAG_FUNC_EMBED_LOOKUP = 53
TAG_FUNC_CAST_DTYPE = 54
TAG_FUNC_RESHAPE = 55
# 60-69: Data Semantic Type
TAG_SEMANTIC_HIDDEN_STATE = 60
TAG_SEMANTIC_LOGITS = 61
TAG_SEMANTIC_TOKEN_IDS = 62
TAG_SEMANTIC_KV_CACHE = 63
# 100-999: Layer Index
LAYER_IDX_TAG_OFFSET = 100

def tag_layer(idx: int) -> int:
    """Generates a layer tag using an offset."""
    if not isinstance(idx, int): raise TypeError(f"Layer index must be an integer, got {type(idx)}")
    if idx < 0: raise ValueError(f"Invalid layer index for tagging: {idx}. Must be non-negative.")
    return LAYER_IDX_TAG_OFFSET + idx
# 1000+: User Defined Tags
USER_TAG_OFFSET = 1000
# --- End of Tags ---
print("Simplified tag ontology (flat integers) defined.")

# --- Group ID Constants (from tensors.py v0.7.0) ---
GROUP_IDX_QWEN_KNOWLEDGE = 100
GROUP_IDX_QWEN_PROCESSOR = 500
GROUP_IDX_LLAMA_KNOWLEDGE = 101
GROUP_IDX_LLAMA_PROCESSOR = 501
GROUP_IDX_DEEPSEEK_KNOWLEDGE = 102 # Added constant
# GROUP_IDX_DEEPSEEK_PROCESSOR = 502 # Optional
GROUP_IDX_GENERIC_PROCESSOR = 50
print(f"Group Indices defined: QwenK={GROUP_IDX_QWEN_KNOWLEDGE}, QwenP={GROUP_IDX_QWEN_PROCESSOR}, DeepSeekK={GROUP_IDX_DEEPSEEK_KNOWLEDGE}")


# --- Mappings (from tensors.py v0.7.0) ---
# 1. DATA_TYPE_MAPPING
DATA_TYPE_MAPPING = {
    "knowledge": 1,
    "processor": 2,
    "converter": 3,
    "state": 4,
}
REVERSE_DATA_TYPE_MAPPING = {
    1: "knowledge",
    2: "processor",
    3: "converter",
    4: "state",
}
print(f"DATA_TYPE_MAPPING defined: {DATA_TYPE_MAPPING}")

# 2. DTYPE_MAPPING
DTYPE_MAPPING = {
    # Standard Names
    'float32': 1, 'float16': 2, 'bfloat16': 3, 'int8': 4, 'int4': 5,
    'int32': 6, 'int64': 7, 'bool': 8, 'complex64': 9, 'complex128': 10,
    # Numpy Types
    np.float32: 1, np.float16: 2, np.int8: 4, np.int32: 6, np.int64: 7,
    np.bool_: 8, np.complex64: 9, np.complex128: 10,
    # PyTorch Types (as strings and potentially objects if torch loaded)
    'torch.float32': 1, 'torch.float16': 2, 'torch.bfloat16': 3, 'torch.int8': 4,
    'torch.int32': 6, 'torch.int64': 7, 'torch.bool': 8,
    'torch.complex64': 9, 'torch.complex128': 10,
}
# Add torch objects if torch is available
if 'torch' in globals():
    DTYPE_MAPPING[torch.float32] = 1
    DTYPE_MAPPING[torch.float16] = 2
    DTYPE_MAPPING[torch.bfloat16] = 3
    DTYPE_MAPPING[torch.int8] = 4
    DTYPE_MAPPING[torch.int32] = 6
    DTYPE_MAPPING[torch.int64] = 7
    DTYPE_MAPPING[torch.bool] = 8
    DTYPE_MAPPING[torch.complex64] = 9
    DTYPE_MAPPING[torch.complex128] = 10

REVERSE_DTYPE_MAPPING = {
    1: 'float32', 2: 'float16', 3: 'bfloat16', 4: 'int8', 5: 'int4',
    6: 'int32', 7: 'int64', 8: 'bool', 9: 'complex64', 10: 'complex128',
}
print(f"DTYPE_MAPPING defined.")

# 3. STATUS_MAPPING
STATUS_MAPPING = {
    "active": 1,
    "archived": 0
}
REVERSE_STATUS_MAPPING = {
    1: "active",
    0: "archived"
}
print(f"STATUS_MAPPING defined: {STATUS_MAPPING}")

# --- Metadata Encoding Configuration (from tensors.py v0.7.0) ---
METADATA_STRUCTURE_VERSION = 1.1
print(f"Metadata Structure Version: {METADATA_STRUCTURE_VERSION}")

print("Tag ontology, Group IDs, Mappings, and Config defined for Cell 2.")

In [None]:
# === Cell 3: Initialize Veector (SINGLE Instance) ===
from core import Veector # Импортируем класс Veector из core.py
try:
    # Используем этот путь при инициализации
    vec = Veector(db_dir=DB_PATH, ipfs_enabled=False)
    print(f"Veector core initialized using DB at: {DB_PATH.resolve()}")
except Exception as e:
    print(f"FATAL: Veector initialization failed: {e}")
    raise RuntimeError("Veector Core failed to initialize") from e

In [None]:
# === Cell 4: Load Hugging Face Model ===

model = None
tokenizer = None
try:
    model = AutoModelForCausalLM.from_pretrained(f"deepseek-ai/{model_NAME}", torch_dtype=TORCH_DTYPE, trust_remote_code=True)
    tokenizer = AutoTokenizer.from_pretrained(f"deepseek-ai/{model_NAME}", trust_remote_code=True)
    model.eval() # Set to evaluation mode
    print(f"Successfully loaded HF model: {model_NAME}")
    print(f"Model config: {model.config}")
except Exception as e:
    print(f"FATAL: Failed to load HF model '{model_NAME}': {e}")
    # Stop execution
    raise RuntimeError(f"Hugging Face model loading failed") from e

# Clean up GPU memory if possible after loading
if TORCH_AVAILABLE and torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()
print("Model loaded and memory potentially cleaned.")

In [None]:
# === Skript dlja prohoda HF modeli v float32 i sohranenija promezhutochnyh vyhodov ===

import time
import pickle
import numpy as np
import traceback
import os
from pathlib import Path
from functools import partial

# --- Neobhodimye biblioteki ---
try:
    import torch
    from torch import nn
    from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
    print("Torch and Transformers imported successfully.")
except ImportError as e:
    print(f"FATAL ERROR: Missing essential libraries (torch, transformers): {e}")
    print("Please install them: pip install torch transformers accelerate")
    exit()

# --- Konfiguracija ---
# Ubedites', chto eti peremennye sootvetstvujut vashemu okruzheniju
MODEL_SOURCE = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
TOKENIZER_SOURCE = MODEL_SOURCE

PROMPT = "Hello, how are you?" # Tot zhe prompt, chto i v skripte sravnenija
# >>> IZMENENO: Novoe imja fajla dlja float32 vyhodov <<<
OUTPUT_FILENAME = "hf_reference_outputs_fp32.pkl"

# --- Zagruzka Tokenizatora ---
print("\\n--- Loading Tokenizer ---")
tokenizer = None
try:
    print(f"Loading Tokenizer from: {TOKENIZER_SOURCE}")
    tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_SOURCE, trust_remote_code=True)
    print(f"Tokenizer class: {tokenizer.__class__.__name__}")
except Exception as e:
    print(f"FATAL ERROR loading tokenizer: {e}")
    exit()

# --- Podgotovka vhodnyh dannyh ---
print("\\n--- Preparing Input IDs ---")
input_ids_torch = None
input_seq_len = 0
try:
    messages = [{"role": "user", "content": PROMPT}]
    print("Applying chat template...")
    prompt_input_ids_np = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="np"
    )
    if prompt_input_ids_np.ndim == 1:
        prompt_input_ids_np = np.expand_dims(prompt_input_ids_np, axis=0)

    input_seq_len = prompt_input_ids_np.shape[1]
    # Poka ostavljaem na CPU, model' budet zagruzhena na CPU ili GPU nizhe
    input_ids_torch = torch.tensor(prompt_input_ids_np)

    print(f"Input IDs shape: {input_ids_torch.shape}")
    print(f"Input Sequence Length: {input_seq_len}")
    print(f"Decoded Input: '{tokenizer.decode(input_ids_torch[0].cpu().numpy())}'")
except Exception as e:
    print(f"FATAL ERROR preparing input: {e}")
    exit()

# --- Zagruzka i Progon Etalonnoj Modeli v Float32 ---
print(f"\\n--- Loading and Running HF Model ({MODEL_SOURCE}) in float32 ---")
hf_outputs = {}
hook_handles = []
model_fp32 = None

def get_hook(name):
    def hook_fn(module, input, output):
        actual_output = output[0] if isinstance(output, tuple) else output
        print(f"  [HOOK] Captured output for: {name} (Shape: {actual_output.shape}, Device: {actual_output.device})")
        # Sohranjaem na CPU v formate NumPy float32
        hf_outputs[name] = actual_output.detach().cpu().numpy().astype(np.float32)
    return hook_fn

try:
    print(f"Loading HF Model {MODEL_SOURCE} with float32...")
    # >>> IZMENENO: Zagruzhaem s torch_dtype=torch.float32 <<<
    model_fp32 = AutoModelForCausalLM.from_pretrained(MODEL_SOURCE, torch_dtype=torch.float32, trust_remote_code=True)
    model_fp32.eval()
    # Peremestite na GPU, esli neobhodimo i vozmozhno
    # model_fp32.to('cuda')
    # input_ids_torch = input_ids_torch.to(model_fp32.device) # Peremestit' vhodnye dannye tozhe
    print(f"HF Model loaded to device: {model_fp32.device}")

    # Registracija Hukov
    print("Registering hooks for float32 model...")
    model_config = model_fp32.config
    num_layers = model_config.num_hidden_layers
    hook_handles.append(model_fp32.model.embed_tokens.register_forward_hook(get_hook("embed_tokens")))
    for i in range(num_layers):
        hook_handles.append(model_fp32.model.layers[i].register_forward_hook(get_hook(f"layer_{i}_output")))
    hook_handles.append(model_fp32.model.norm.register_forward_hook(get_hook("final_norm")))
    hook_handles.append(model_fp32.lm_head.register_forward_hook(get_hook("lm_head")))
    print(f"Registered {len(hook_handles)} hooks.")

    # Prjamoj prohod
    print("Running HF model forward pass (float32)...")
    with torch.no_grad():
        hf_model_output = model_fp32(input_ids_torch.to(model_fp32.device), use_cache=False) # Ubedimsja chto input na tom zhe device
    print("HF forward pass complete.")

except Exception as e:
    print(f"FATAL ERROR during HF float32 execution: {e}")
    traceback.print_exc()
finally:
    # Vsegda udaljajem huki i model' posle ispol'zovanija
    for handle in hook_handles: handle.remove()
    print("Hooks removed.")
    if 'model_fp32' in locals() and model_fp32 is not None:
        del model_fp32
        if 'torch' in locals() and hasattr(torch, 'cuda'): torch.cuda.empty_cache()
        gc.collect()
        print("Cleaned up float32 model.")

# --- Sohranenie rezul'tatov ---
if hf_outputs: # Sohranjaem tol'ko esli chto-to sobrali
    print(f"\\n--- Saving Captured Float32 Outputs to {OUTPUT_FILENAME} ---")
    try:
        with open(OUTPUT_FILENAME, 'wb') as f:
            pickle.dump(hf_outputs, f, pickle.HIGHEST_PROTOCOL)
        print(f"Successfully saved {len(hf_outputs)} captured outputs.")
        print("Saved keys:", list(hf_outputs.keys()))
    except Exception as e:
        print(f"FATAL ERROR saving outputs: {e}")
        traceback.print_exc()
else:
    print("\\n--- No outputs captured from HF model, skipping save. ---")


print(f"\\n--- Script Finished ---")


In [None]:
# === Cell 5: Convert Parameters to Knowledge Tensors (Transposed Weights) ===

import gc
import pickle
import time
import traceback
from pathlib import Path

import numpy as np
import torch

# --- Импорты из проекта (убедись, что версии >= 0.6.12 и >= 0.7.6) ---
try:
    from tensors import (
        TENSORS_VERSION, TensorCoordinate, create_tensor, MetadataTuple,
        validate_tensor_tuple, validate_tensor, DTYPE_MAPPING,
        TAG_TYPE_KNOWLEDGE, TAG_MODEL_DEEPSEEK, TAG_COMP_WEIGHTS, TAG_COMP_BIAS,
        TAG_COMP_EMBEDDING, TAG_COMP_LM_HEAD, TAG_COMP_LAYERNORM, TAG_COMP_ATTN_Q,
        TAG_COMP_ATTN_K, TAG_COMP_ATTN_V, TAG_COMP_ATTN_O, TAG_COMP_FFN_GATE,
        TAG_COMP_FFN_UP, TAG_COMP_FFN_DOWN, tag_layer, GROUP_IDX_QWEN_KNOWLEDGE,
        TAG_PREC_FLOAT32, TAG_PREC_FLOAT16, TAG_PREC_BFLOAT16, TAG_PREC_INT8
    )
    if TENSORS_VERSION < "0.7.6":
        raise ImportError(f"Requires tensors v0.7.6+, found v{TENSORS_VERSION}")
    from core import Veector, CORE_VERSION
    if CORE_VERSION < "0.6.12":
        raise ImportError(f"Requires core v0.6.12+, found v{CORE_VERSION}")
except ImportError as e:
    print(f"FATAL ERROR: Import failed: {e}")
    raise

# --- Версия Ячейки ---
CONVERTER_CELL5_VERSION = "Hybrid v0.7.6 + Quant + Transpose v2"
# --- Конец Версии ---

print(f"--- Running Converter Cell 5 v{CONVERTER_CELL5_VERSION} ---")
start_cell5_time = time.time()

# --- Проверка необходимых переменных ---
if 'vec' not in locals() or vec is None:
    raise NameError("'vec' object not defined.")
if 'DB_PATH' not in locals() or not isinstance(DB_PATH, Path):
    raise NameError("DB_PATH not defined or invalid.")
if 'model' not in locals() or model is None:
    raise NameError("HF 'model' not loaded.")
if 'model_NAME' not in locals() or not model_NAME:
    raise NameError("model_NAME not defined.")

# --- Переинициализация DB (если необходимо) ---
if not hasattr(vec, 'db') or vec.db is None:
    try:
        print("Attempting DB re-init for Cell 5...")
        # Импортируем только если нужно, чтобы избежать ненужных импортов вверху
        from veectordb import VeectorDB
        vec.db = VeectorDB(db_dir=DB_PATH)
        print("DB connection re-established.")
    except Exception as db_reinit_e:
        raise AttributeError(f"DB re-init failed: {db_reinit_e}")
else:
    print("'vec' object found and DB connection seems active.")

# --- Инициализация ---
ORIGINAL_NAME_TO_ID_MAP: Dict[str, int] = {}
ID_TO_ORIGINAL_NAME_MAP: Dict[int, str] = {}
NEXT_NAME_ID: int = 0
print("Initialized Name <-> ID mapping dictionaries.")

knowledge_map: Dict[str, str] = {} # Карта Имя -> ID Знания
param_count: int = 0
conversion_errors: int = 0

# --- Вспомогательная функция для ID ---
def get_or_create_name_id(name: Optional[str]) -> int:
    """Assigns and returns a unique ID for a parameter name."""
    global NEXT_NAME_ID, ORIGINAL_NAME_TO_ID_MAP, ID_TO_ORIGINAL_NAME_MAP
    if not name:
        return -1
    if name in ORIGINAL_NAME_TO_ID_MAP:
        return ORIGINAL_NAME_TO_ID_MAP[name]
    current_id = NEXT_NAME_ID
    ORIGINAL_NAME_TO_ID_MAP[name] = current_id
    ID_TO_ORIGINAL_NAME_MAP[current_id] = name
    NEXT_NAME_ID += 1
    return current_id

# --- Параметры конвертации ---
default_precision_tag = TAG_PREC_FLOAT16
default_torch_dtype = torch.float16
if 'TORCH_DTYPE' in locals(): # Определено в Cell 1
    default_torch_dtype = TORCH_DTYPE
    if TORCH_DTYPE == torch.float16: default_precision_tag = TAG_PREC_FLOAT16
    elif TORCH_DTYPE == torch.bfloat16: default_precision_tag = TAG_PREC_BFLOAT16
    elif TORCH_DTYPE == torch.float32: default_precision_tag = TAG_PREC_FLOAT32
    elif TORCH_DTYPE == torch.int8: default_precision_tag = TAG_PREC_INT8

knowledge_group_idx = GROUP_IDX_QWEN_KNOWLEDGE # 100
model_tag = TAG_MODEL_DEEPSEEK # 12

print(f"\n--- Creating Knowledge Tensors (Group: {knowledge_group_idx}) ---")
print(f"    Model Tag: {model_tag}")
print(f"    Default Precision Tag: {default_precision_tag}")
print(f"    Quantizing Embed/LMHead to INT8. Transposing Linear Weights.")

# --- Основной цикл конвертации ---
total_params = sum(1 for _ in model.named_parameters())
print(f"Found {total_params} parameters to process.")

for idx, (name, param) in enumerate(model.named_parameters()):
    loop_start_time = time.time()
    print(f"\nProcessing Param {idx+1}/{total_params}: {name}")
    print(f"  Original Shape: {param.shape} | Dtype: {param.dtype}")

    # Инициализация переменных цикла
    param_data_fp32: Optional[np.ndarray] = None
    knowledge_data_to_pass: Optional[np.ndarray] = None
    tags: List[int] = []
    metadata_extra_to_pass: Optional[Dict] = None
    dtype_to_pass: Any = None
    final_tags: List[int] = []
    knowledge_coord: Optional[TensorCoordinate] = None
    name_id: int = -1
    create_result: Optional[List] = None
    knowledge_id: Optional[str] = None
    requires_transpose: bool = False

    try:
        # Шаг 1-3: Получение данных, ID, Тегов, Координат
        param_data_fp32 = param.data.cpu().to(torch.float32).numpy()
        name_id = get_or_create_name_id(name)
        tags = [TAG_TYPE_KNOWLEDGE, model_tag]
        layer_idx = -1
        group_idx = knowledge_group_idx
        coord_x = 0
        current_nest = 1 # По умолчанию Nest=1 для знаний
        is_weight = name.endswith(".weight")
        is_bias = name.endswith(".bias")

        if is_weight: tags.append(TAG_COMP_WEIGHTS)
        elif is_bias: tags.append(TAG_COMP_BIAS)

        # Определение компонента, X координа и флага транспонирования
        if "model.embed_tokens.weight" in name:
             tags.append(TAG_COMP_EMBEDDING); coord_x = 0
        elif "lm_head.weight" in name:
             tags.append(TAG_COMP_LM_HEAD); coord_x = 1; requires_transpose = True
        elif "model.norm.weight" in name:
             layer_idx = model.config.num_hidden_layers; tags.append(TAG_COMP_LAYERNORM); coord_x = 0
        elif ".layers." in name:
            try:
                layer_part = name.split('.layers.')[1]
                layer_idx = int(layer_part.split('.')[0])
                if layer_idx >= 0: tags.append(tag_layer(layer_idx))
                else: raise ValueError(f"Invalid L idx: {layer_idx}")

                component_tag_layer = None
                if "self_attn" in name:
                    if "q_proj.weight" in name: component_tag_layer = TAG_COMP_ATTN_Q; coord_x = 10; requires_transpose = True
                    elif "q_proj.bias" in name: component_tag_layer = TAG_COMP_ATTN_Q; coord_x = 11
                    elif "k_proj.weight" in name: component_tag_layer = TAG_COMP_ATTN_K; coord_x = 20; requires_transpose = True
                    elif "k_proj.bias" in name: component_tag_layer = TAG_COMP_ATTN_K; coord_x = 21
                    elif "v_proj.weight" in name: component_tag_layer = TAG_COMP_ATTN_V; coord_x = 30; requires_transpose = True
                    elif "v_proj.bias" in name: component_tag_layer = TAG_COMP_ATTN_V; coord_x = 31
                    elif "o_proj.weight" in name: component_tag_layer = TAG_COMP_ATTN_O; coord_x = 40; requires_transpose = True
                elif "mlp" in name:
                    if "gate_proj.weight" in name: component_tag_layer = TAG_COMP_FFN_GATE; coord_x = 50; requires_transpose = True
                    elif "up_proj.weight" in name: component_tag_layer = TAG_COMP_FFN_UP; coord_x = 60; requires_transpose = True
                    elif "down_proj.weight" in name: component_tag_layer = TAG_COMP_FFN_DOWN; coord_x = 70; requires_transpose = True
                elif "input_layernorm.weight" in name: component_tag_layer = TAG_COMP_LAYERNORM; coord_x = 1
                elif "post_attention_layernorm.weight" in name: component_tag_layer = TAG_COMP_LAYERNORM; coord_x = 2

                if component_tag_layer: tags.append(component_tag_layer)
                elif not is_weight and not is_bias: print(f"  WARN: Unrecognized comp in L{layer_idx}: {name}"); coord_x = 99
            except Exception as parse_e:
                print(f"  Error parsing layer for {name}: {parse_e}"); conversion_errors += 1; continue
        else:
            print(f"  WARN: Param unmatched: {name}"); layer_idx = -1; coord_x = 999

        knowledge_coord = TensorCoordinate(layer=layer_idx, group=group_idx, nest=current_nest, x=coord_x)

        # Шаг 4: Квантование / Приведение типов / Транспонирование
        quantization_scale = None
        current_precision_tag = default_precision_tag
        data_before_save = None

        if name == "model.embed_tokens.weight" or name == "lm_head.weight":
            if np.issubdtype(param_data_fp32.dtype, np.floating):
                try:
                    abs_max = np.max(np.abs(param_data_fp32)); scale = 1.0
                    if abs_max >= 1e-9: scale = abs_max / 127.0
                    scale = max(scale, 1e-9) # Prevent division by zero
                    quantized_data = np.round(param_data_fp32 / scale).astype(np.int8)
                    data_before_save = quantized_data; dtype_to_pass = np.int8
                    quantization_scale = float(scale); current_precision_tag = TAG_PREC_INT8
                    metadata_extra_to_pass = {"quantization_scale": quantization_scale}
                    # Транспонируем только LM Head ПОСЛЕ квантования
                    if name == "lm_head.weight": # requires_transpose is True here
                        print("  Transposing quantized LM Head weights...")
                        data_before_save = data_before_save.T
                except Exception as quant_e:
                     print(f"  ERROR quantizing {name}: {quant_e}"); conversion_errors += 1; continue
            else: # Не float - не квантуем
                 data_before_save = param_data_fp32; dtype_to_pass = data_before_save.dtype; current_precision_tag = DTYPE_MAPPING.get(dtype_to_pass, default_precision_tag); metadata_extra_to_pass = None
                 if requires_transpose: # Все равно транспонируем, если нужно
                      print(f"  Transposing non-quantized {name}...")
                      data_before_save = data_before_save.T
        else: # Не embedding и не lm_head
            try:
                target_np_dtype = default_torch_dtype.numpy_dtype if hasattr(default_torch_dtype, 'numpy_dtype') else np.float16
                data_before_save = param_data_fp32.astype(target_np_dtype)
                dtype_to_pass = data_before_save.dtype; current_precision_tag = default_precision_tag
                metadata_extra_to_pass = None
                # Транспонируем если нужно
                if requires_transpose:
                    print(f"  Transposing {name} weights...")
                    data_before_save = data_before_save.T
            except Exception as cast_e:
                 print(f"  ERROR casting/transposing {name}: {cast_e}"); conversion_errors += 1; continue

        # Финальные данные для сохранения
        knowledge_data_to_pass = data_before_save
        final_shape_to_save = knowledge_data_to_pass.shape if knowledge_data_to_pass is not None else None

        # Шаг 5: Финализация тегов
        final_tags = list(tags)
        if current_precision_tag != default_precision_tag and default_precision_tag in final_tags:
            final_tags.remove(default_precision_tag)
        if current_precision_tag:
            final_tags.append(current_precision_tag)
        final_tags = sorted(list(set(final_tags)))

        print(f"  Final Tags: {final_tags}"); print(f"  Coordinate: {knowledge_coord}")
        print(f"  Data to save: dtype={dtype_to_pass}, shape={final_shape_to_save}") # Используем final_shape_to_save
        if metadata_extra_to_pass: print(f"  Extra Metadata: {metadata_extra_to_pass}")

        # Шаг 6: Создание Тензора
        create_result = vec.create_tensor(
             coord=knowledge_coord,
             tensor_type="knowledge",
             knowledge_data=knowledge_data_to_pass, # Передаем возможно транспонированные данные
             tags=final_tags,
             dtype=dtype_to_pass,
             shape=final_shape_to_save, # Передаем правильную форму
             name_id=name_id,
             metadata_extra=metadata_extra_to_pass,
             status="active"
         )

        # Шаг 8: Сохранение Тензора
        knowledge_id = vec.save_tensor(create_result) # Передаем список

        if knowledge_id:
            knowledge_map[name] = knowledge_id
            param_count += 1
        else:
            conversion_errors += 1
            print(f"  ERROR saving tensor for {name}")

    except Exception as create_save_e:
        print(f"  ERROR during create/save for {name}: {create_save_e}")
        traceback.print_exc(); conversion_errors += 1
    finally:
        if param_data_fp32 is not None:
            del param_data_fp32 # Освобождаем память
        loop_end_time = time.time()
        # print(f"  Param {idx+1} time: {loop_end_time - loop_start_time:.2f}s") # Сократим лог

# --- Конец Цикла ---

print(f"\n--- Finished saving {param_count} knowledge tensors to {vec.db.db_root_path if vec.db else 'N/A'} ---")
if conversion_errors > 0:
    print(f"!!! WARNING: {conversion_errors} errors occurred during knowledge conversion !!!")

# --- Сохранение Name ID Map ---
name_map_file = DB_PATH / f"{model_NAME}_name_id_map.pkl"
try:
    map_data_to_save = {
        "name_to_id": ORIGINAL_NAME_TO_ID_MAP,
        "id_to_name": ID_TO_ORIGINAL_NAME_MAP,
        "next_id": NEXT_NAME_ID
    }
    with open(name_map_file, 'wb') as f:
        pickle.dump(map_data_to_save, f)
    print(f"\nName <-> ID map saved to {name_map_file}")
except Exception as e:
    print(f"  Error saving name ID map: {e}")

# --- Сохранение Knowledge Map (для Cell 5.5) ---
# Имя файла определяется в Cell 4.5, но мы его здесь переопределим для надежности
knowledge_map_filename = f"{model_NAME}_knowledge_map.pkl"
knowledge_map_filepath = DB_PATH / knowledge_map_filename
try:
    print(f"\n--- Saving Knowledge Map (for Cell 5.5) ---")
    with open(knowledge_map_filepath, 'wb') as f:
        pickle.dump(knowledge_map, f)
    print(f"  Knowledge map saved to {knowledge_map_filepath}")
except Exception as e:
    print(f"  Error saving knowledge map: {e}")
    # Важно: если карта не сохранилась, Cell 6 не сможет загрузить ее позже
    # Можно добавить обработку этой ошибки, если нужно
    conversion_errors += 1 # Считаем это ошибкой конвертации

print(f"\n'knowledge_map' created with {len(knowledge_map)} entries for Cell 5.5.")

# --- Очистка ---
# (Без изменений)
if 'torch' in locals() and hasattr(torch, 'cuda') and torch.cuda.is_available():
     torch.cuda.empty_cache()
gc.collect()
print("\nMemory cleanup attempted.")
print("DB connection remains open for Cell 5.5/6.")

# --- Завершение Ячейки 5 ---
end_cell5_time = time.time()
print(f"--- Cell 5 Finished in {end_cell5_time - start_cell5_time:.2f} seconds ---")

In [None]:
# === Cell 5.5: Save Intermediate Data for Cell 6 ===

import pickle
from pathlib import Path
import os

print("\n--- Running Cell 5.5: Saving Intermediate Data ---")

# --- Проверка наличия необходимых переменных из предыдущих ячеек ---
if 'knowledge_map' not in locals() or not isinstance(knowledge_map, dict):
    raise NameError("Variable 'knowledge_map' not found or invalid. Ensure Cell 5 ran successfully.")
if 'model' not in locals() or model is None:
    # Нам нужен как минимум конфиг модели для num_layers
    raise NameError("Variable 'model' (or model.config) not found. Ensure Cell 4 ran successfully.")
if 'model_NAME' not in locals() or not model_NAME:
     raise NameError("Variable 'model_NAME' not defined. Check Cell 1.")
if 'DB_PATH' not in locals() or not isinstance(DB_PATH, Path):
     raise NameError("Variable 'DB_PATH' not defined or invalid. Check Cell 1.")

# --- Данные для сохранения ---
# Сохраняем только конфиг, а не всю модель, для экономии места
cell6_input_data = {
    'knowledge_map': knowledge_map,
    'model_config': model.config, # Сохраняем конфиг
    'model_name': model_NAME,
    'db_path': str(DB_PATH.resolve()) # Сохраняем путь к БД как строку
}

# --- Имя файла и сохранение ---
intermediate_filename = f"{model_NAME}_cell6_input_data.pkl"
intermediate_filepath = DB_PATH / intermediate_filename

try:
    # Убедимся, что директория DB_PATH существует
    DB_PATH.mkdir(parents=True, exist_ok=True)

    print(f"Saving intermediate data to: {intermediate_filepath}")
    with open(intermediate_filepath, 'wb') as f:
        pickle.dump(cell6_input_data, f, pickle.HIGHEST_PROTOCOL)
    print("Intermediate data saved successfully.")
    print(f"  Knowledge map entries: {len(knowledge_map)}")
    print(f"  Model Config Type: {type(model.config)}")

except Exception as e:
    print(f"---!!! ERROR saving intermediate data: {e} !!!---")
    # Можно добавить raise e, если критично прервать выполнение
else:
    print("--- Cell 5.5 Finished ---")

# --- Очистка памяти от модели (если она больше не нужна до перезапуска) ---
# Раскомментируй, если хочешь освободить память после сохранения промежуточных данных
# import gc
# if 'model' in locals(): del model
# if 'torch' in locals() and hasattr(torch, 'cuda') and torch.cuda.is_available(): torch.cuda.empty_cache()
# gc.collect()
# print("Cleaned up model from memory (optional).")

In [19]:
!rm -rf data/db/g500

In [None]:
# === Исправленный Код для Ячейки 6 (Высокоуровневые OP + Эталонные выходы) ===
# Создает процессоры Veector с использованием новых OP-кодов
# и одновременно сохраняет эталонные выходы HF модели (float32)

import time
import pickle
import numpy as np
import traceback
import os
import gc
from pathlib import Path
from functools import partial
from typing import Dict, List, Any, Optional, Tuple, Union

# --- Необходимые библиотеки ---
try:
    import torch
    from torch import nn
    from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
    print("Torch and Transformers imported successfully.")
except ImportError as e:
    print(f"FATAL ERROR: Missing essential libraries (torch, transformers): {e}")
    raise

# --- Импорты проекта Veector ---
try:
    from core import Veector, CORE_VERSION
    from tensors import (
        TENSORS_VERSION, TensorCoordinate, create_tensor, MetadataTuple,
        validate_tensor_tuple, validate_tensor, DTYPE_MAPPING, get_tensor_hash,
        TAG_TYPE_PROCESSOR, TAG_FUNC_EMBED_LOOKUP, TAG_FUNC_ATTENTION,
        TAG_FUNC_FFN, TAG_FUNC_LINEAR, TAG_COMP_LAYERNORM, TAG_MODEL_DEEPSEEK,
        tag_layer, GROUP_IDX_QWEN_PROCESSOR, GROUP_IDX_QWEN_KNOWLEDGE,
        TAG_COMP_EMBEDDING, TAG_COMP_WEIGHTS, TAG_COMP_BIAS, TAG_COMP_ATTN_Q,
        TAG_COMP_ATTN_K, TAG_COMP_ATTN_V, TAG_COMP_ATTN_O, TAG_COMP_FFN_GATE,
        TAG_COMP_FFN_UP, TAG_COMP_FFN_DOWN, TAG_COMP_LM_HEAD,
        TAG_PREC_FLOAT32, TAG_PREC_FLOAT16, TAG_PREC_BFLOAT16, TAG_PREC_INT8
    )
    from operations import OPERATIONS_VERSION
    from veectordb import VeectorDB, VEECTORDB_VERSION

    print(f"Using Core: {CORE_VERSION}, Tensors: {TENSORS_VERSION}, Ops: {OPERATIONS_VERSION}, DB: {VEECTORDB_VERSION}")
    if CORE_VERSION < "0.7.10": print("WARN: Expected core v0.7.10+ for high-level op registration.")
    if OPERATIONS_VERSION < "0.8.9": print("WARN: Expected operations v0.8.9+")
    if TENSORS_VERSION < "0.7.6": raise ImportError("Tensors version too old")
    if VEECTORDB_VERSION < "0.9.7": raise ImportError("VeectorDB version too old")

    print("Veector components imported successfully.")

    # --- Локальное определение OP кодов (включая новые) ---
    OP_SUM=[0,0,0]; OP_SUBTRACT=[0,0,1]; OP_ADD=[0,0,2]; OP_MULTIPLY=[0,1,0]
    OP_DIVIDE=[0,1,1]; OP_SQRT=[0,2,0]; OP_POWER=[0,2,1]; OP_ABS=[0,3,0]
    OP_MOD=[0,5,0]; OP_FLOOR=[0,6,0]; OP_CEIL=[0,6,1]; OP_SIN=[1,0,0]
    OP_COS=[1,0,1]; OP_TAN=[1,1,0]; OP_COT=[1,1,1]; OP_ASIN=[1,2,0]
    OP_ACOS=[1,2,1]; OP_ATAN=[1,3,0]; OP_GREATER=[2,0,0]; OP_EQUAL=[2,0,1]
    OP_AND=[2,1,0]; OP_OR=[2,1,1]; OP_NOT=[2,2,0]; OP_XOR=[2,3,0]
    OP_NAND=[2,4,0]; OP_NOR=[2,4,1]; OP_IF=[3,0,0]; OP_LOOP_MULT=[4,0,0]
    OP_CHOICE=[7,0,0]; OP_RAND_UNIFORM=[5,1,0]; OP_RAND_NORMAL=[5,1,1]
    OP_MEDIAN=[5,2,0]; OP_PRINT=[8,0,0]; OP_IDENTITY=[9,0,0]
    OP_TRIGGER_REASON=[10,0,0]; OP_DFS=[15,0,0]; OP_MEAN=[16,0,0]
    OP_STDDEV=[16,1,0]; OP_RELU=[18,0,0]; OP_SIGMOID=[18,1,0]
    OP_SOFTMAX=[18,2,0]; OP_LEAKY_RELU=[18,3,0]; OP_SILU=[18,4,0]
    OP_GELU=[40,5,0]; OP_EXP_SMOOTHING=[19,0,0]; OP_NORMALIZE_01=[20,0,0]
    OP_INTERPOLATE=[20,1,0]; OP_LAYER_NORM=[40,1,0]; OP_BATCH_NORM=[40,4,0]
    OP_DROPOUT=[40,3,0]; OP_GET_Q_ROT=[40,7,1]; OP_GET_K_ROT=[40,7,2]
    OP_MATRIX_MULTIPLY=[30,0,0]; OP_DETERMINANT=[30,1,0]; OP_EIGENVALUES=[30,2,0]
    OP_CONVOLUTION=[30,3,0]; OP_TRANSPOSE=[30,4,0]; OP_INVERSE=[30,5,0]
    OP_TRACE=[30,6,0]; OP_ATTENTION_MULTIHEAD=[40,2,0]; OP_EMBEDDING_LOOKUP=[40,6,0]
    OP_APPLY_ROPE=[40,7,0]; OP_RESHAPE_HEADS=[40,9,0]; OP_REPEAT_KV_HEADS=[40,9,1]
    OP_SCALED_DOT_PROD_ATTN=[40,9,2]; OP_MERGE_HEADS=[40,9,3]; OP_ADD_BIAS=[0,0,3]
    OP_UPDATE_KV_CACHE = [40, 10, 0]; OP_CREATE_CAUSAL_MASK = [40, 10, 1]
    OP_RESIDUAL_ADD=OP_ADD; OP_LINEAR=OP_MATRIX_MULTIPLY; OP_FINAL_NORM=OP_LAYER_NORM
    OP_LINEAR_HEAD=OP_LINEAR; OP_QUANTUM_HADAMARD=[50,0,0]; OP_QUANTUM_PAULI_X=[50,0,1]
    OP_QUANTUM_CNOT=[50,1,0]; OP_QUANTUM_MEASURE=[50,2,0]; OP_QUANTUM_SUPERPOS=[50,3,0]
    OP_QUANTUM_ENTANGLE=[50,4,0]; META_OP_CATEGORY=99; OP_STORE=[99,0,0]
    OP_LOAD=[99,0,1]; OP_LOAD_INITIAL_INPUT=[99,0,3]; OP_DEBUG_CONTEXT=[99,1,0]
    OP_MAKE_TUPLE = [99, 2, 0]
    # Novye vysokourovnevye OP Kody
    OP_QWEN2_RMSNORM = [300, 0, 0]
    OP_QWEN2_ATTENTION = [300, 1, 0]
    OP_QWEN2_MLP = [300, 2, 0]
    # --- Konec OP kodov ---

except ImportError as e:
    print(f"FATAL ERROR: Failed to import Veector components: {e}")
    raise
except Exception as e_other:
    print(f"FATAL ERROR during Veector imports: {e_other}")
    raise

# --- Konfiguracija ---
DB_PATH = Path("./data/db") # Put' k BD Veector
MODEL_SOURCE = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # Identifikator ili lokal'nyj put'
TOKENIZER_SOURCE = MODEL_SOURCE # Gde iskat' tokenizator
REFERENCE_OUTPUT_FILENAME = "hf_reference_outputs_fp32.pkl" # Imja fajla dlja etalonnyh vyhodov
PROMPT_FOR_REFERENCE = "Hello, how are you?" # Standartnyj prompt dlja generacii etalonov

# --- Zagruzka vspomogatel'nyh dannyh ---
print("\\n--- Loading Helper Data (Knowledge Map, Name ID Map) ---")
knowledge_map = None
name_id_map = None
map_model_name = MODEL_SOURCE.split('/')[-1]
knowledge_map_filepath = DB_PATH / f"{map_model_name}_knowledge_map.pkl"
name_map_filepath = DB_PATH / f"{map_model_name}_name_id_map.pkl"

try:
    with open(knowledge_map_filepath, 'rb') as f:
        knowledge_map = pickle.load(f)
    print(f"Loaded knowledge map ({len(knowledge_map)} entries) from {knowledge_map_filepath}")
    if name_map_filepath.is_file():
        with open(name_map_filepath, 'rb') as f: name_id_map = pickle.load(f) # Zagruzhaem, esli est'
        print(f"Loaded name ID map from {name_map_filepath}")
    else: print(f"Warning: Name ID map file not found at {name_map_filepath}")
except FileNotFoundError as e:
    print(f"FATAL ERROR: Required map file not found: {e}. Run previous notebook cells.")
    raise
except Exception as e:
    print(f"FATAL ERROR loading map files: {e}")
    raise
if not knowledge_map: raise ValueError("Knowledge map is empty or failed to load.")

def find_knowledge_id(hf_param_name: str) -> Optional[str]:
    """Ispol'zuet zagruzhennuju kartu znanij."""
    return knowledge_map.get(hf_param_name)

# --- Zagruzka Etalonnoj Modeli (FP32) i Tokenizatora ---
print("\\n--- Loading Reference HF Model (Float32) and Tokenizer ---")
tokenizer = None
model_fp32 = None
model_config = None
num_layers = 0
num_attention_heads = 0
num_key_value_heads = 0
hidden_size = 0
head_dim = 0
rms_norm_eps = 1e-6 # Default, mozhno poluchit' iz konfiga

try:
    print(f"Loading Tokenizer from: {TOKENIZER_SOURCE}")
    tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_SOURCE, trust_remote_code=True)
    print(f"Tokenizer class: {tokenizer.__class__.__name__}")

    print(f"Loading HF Model {MODEL_SOURCE} with float32...")
    model_fp32 = AutoModelForCausalLM.from_pretrained(MODEL_SOURCE, torch_dtype=torch.float32, trust_remote_code=True)
    model_fp32.eval()
    print(f"HF Model loaded to device: {model_fp32.device}")

    model_config = model_fp32.config
    num_layers = model_config.num_hidden_layers
    num_attention_heads = model_config.num_attention_heads
    num_key_value_heads = getattr(model_config, 'num_key_value_heads', num_attention_heads)
    hidden_size = model_config.hidden_size
    if num_attention_heads > 0: head_dim = hidden_size // num_attention_heads
    else: raise ValueError("num_attention_heads is zero")
    rms_norm_eps = model_config.rms_norm_eps # Poluchaem epsilon iz konfiga
    print(f"Model Config: L={num_layers}, H={num_attention_heads}, KVH={num_key_value_heads}, HDim={head_dim}, Epsilon={rms_norm_eps}")

except Exception as e:
    print(f"FATAL ERROR loading HF model/tokenizer: {e}")
    traceback.print_exc()
    raise

# --- Podgotovka Vhodnyh Dannyh dlja Etalona ---
print("\\n--- Preparing Input IDs for Reference Run ---")
input_ids_torch = None
try:
    messages = [{"role": "user", "content": PROMPT_FOR_REFERENCE}]
    input_ids_np = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=True, return_tensors="np")
    if input_ids_np.ndim == 1: input_ids_np = np.expand_dims(input_ids_np, axis=0)
    input_ids_torch = torch.tensor(input_ids_np).to(model_fp32.device)
    print(f"Reference Input IDs shape: {input_ids_torch.shape}")
    print(f"Reference Decoded Input: '{tokenizer.decode(input_ids_torch[0].cpu().numpy())}'")
except Exception as e:
    print(f"FATAL ERROR preparing reference input: {e}")
    raise

# --- Funkcija-Hook i Registracija ---
hf_reference_outputs = {}
hook_handles = []
def get_hook(name):
    def hook_fn(module, input, output):
        actual_output = output[0] if isinstance(output, tuple) else output
        print(f"  [HOOK] Captured output for: {name}")
        hf_reference_outputs[name] = actual_output.detach().cpu().numpy().astype(np.float32)
    return hook_fn

print("\\n--- Registering Hooks for Reference Run ---")
try:
    hook_handles.append(model_fp32.model.embed_tokens.register_forward_hook(get_hook("embed_tokens")))
    for i in range(num_layers):
        hook_handles.append(model_fp32.model.layers[i].register_forward_hook(get_hook(f"layer_{i}_output")))
        hook_handles.append(model_fp32.model.layers[i].input_layernorm.register_forward_hook(get_hook(f"L{i}_input_layernorm")))
        hook_handles.append(model_fp32.model.layers[i].self_attn.register_forward_hook(get_hook(f"L{i}_self_attn")))
        hook_handles.append(model_fp32.model.layers[i].post_attention_layernorm.register_forward_hook(get_hook(f"L{i}_post_attn_layernorm")))
        hook_handles.append(model_fp32.model.layers[i].mlp.register_forward_hook(get_hook(f"L{i}_mlp")))
    hook_handles.append(model_fp32.model.norm.register_forward_hook(get_hook("final_norm")))
    hook_handles.append(model_fp32.lm_head.register_forward_hook(get_hook("lm_head")))
    print(f"Registered {len(hook_handles)} hooks.")
except Exception as e: print(f"FATAL ERROR registering hooks: {e}"); raise

# --- Prjamoj prohod HF modeli dlja sbora etalonnyh znachenij ---
print("\\n--- Running HF Model Forward Pass (Float32) to Capture Reference Outputs ---")
try:
    with torch.no_grad(): hf_model_output = model_fp32(input_ids_torch, use_cache=False)
    print("HF forward pass complete.")
except Exception as e: print(f"FATAL ERROR during HF forward pass: {e}"); traceback.print_exc()
finally:
    for handle in hook_handles: handle.remove()
    print("Hooks removed.")
    del model_fp32; gc.collect();
    if 'torch' in locals() and hasattr(torch, 'cuda'): torch.cuda.empty_cache()
    print("Cleaned up reference model from memory.")
if not hf_reference_outputs: print("FATAL ERROR: No reference outputs were captured. Cannot proceed."); exit()

# --- Inicializacija Veector dlja sohranenija processorov ---
print("\\n--- Initializing Veector for Saving Processors ---")
vec = None
try:
    vec = Veector(db_dir=DB_PATH)
    print(f"Veector core v{CORE_VERSION} initialized for saving.")
except Exception as e: print(f"FATAL: Veector initialization failed: {e}"); raise

# --- Opredelenie i Sohranenie Processorov Veector (s Vysokourovnevymi OP) ---
print("\\n--- Defining and Saving Veector Processor Tensors (using High-Level OPs) ---")
processor_errors = 0
processor_map: Dict[str, str] = {}

def create_and_save_processor(name: str, coord: TensorCoordinate, tags: List[int], interface: Dict, ops_sequences: Dict):
    global processor_errors, processor_map, vec
    proc_id = None
    try:
        print(f"  Defining Processor: {name} at {coord}")
        tensor_structure = vec.create_tensor(coord=coord, tensor_type="processor", tags=tags, interface=interface, ops_sequences=ops_sequences, status="active", name_id=-1)
        if not validate_tensor(tensor_structure): raise ValueError(f"Invalid list structure created for {name}")
        proc_id = vec.save_tensor(tensor_structure)
        if proc_id:
            map_key = ""
            if "Embedding" in name: map_key = "embedding"
            elif "Final Norm" in name: map_key = "final_norm"
            elif "LM Head" in name: map_key = "lm_head"
            elif "Attention Processor L" in name:
              try: layer_idx = int(name.split("L")[-1]); map_key = f"attn_{layer_idx}"
              except: pass
            elif "FFN Processor L" in name:
              try: layer_idx = int(name.split("L")[-1]); map_key = f"ffn_{layer_idx}"
              except: pass
            if map_key: processor_map[map_key] = proc_id; print(f"    SUCCESS: Saved {name} with ID: {proc_id} (Key: {map_key})")
            else: print(f"    WARN: Saved {name} with ID: {proc_id}, but could not determine map key.")
        else: processor_errors += 1; print(f"    ERROR saving {name}")
    except Exception as e: print(f"    ERROR during definition/saving of {name}: {e}"); traceback.print_exc(); processor_errors += 1
    return proc_id

# --- Parametry dlja processorov ---
processor_group_idx = GROUP_IDX_QWEN_PROCESSOR # 500
model_tag = TAG_MODEL_DEEPSEEK # 12
prec_tag_weights = TAG_PREC_FLOAT16

# --- 1. Embedding Processor (Ostavljaem nizkourovnevyj) ---
try:
    coord = TensorCoordinate(layer=-1, group=processor_group_idx, nest=0, x=0)
    tags = [TAG_TYPE_PROCESSOR, TAG_FUNC_EMBED_LOOKUP, model_tag]
    param_name = "embedding_matrix"
    kn_tags = [TAG_COMP_EMBEDDING, model_tag, TAG_COMP_WEIGHTS, TAG_PREC_INT8]
    kid = find_knowledge_id("model.embed_tokens.weight")
    if not kid: raise ValueError("Embedding knowledge tensor ID not found in map.")
    interface = { "inputs": [{"name":"token_ids", "dtype":"int64"}], "outputs": [{"name":"hidden_states", "dtype":"float16"}], "knowledge_needed": [{"param_name": param_name, "tags": kn_tags, "knowledge_id": kid}] }
    ops_sequences = {'default': [[OP_EMBEDDING_LOOKUP, {"embedding_matrix": param_name}]]}
    create_and_save_processor("Embedding Processor", coord, tags, interface, ops_sequences)
except Exception as e: print(f"Error defining Embedding Processor: {e}"); processor_errors += 1

# --- 2. Sloi Transformera (Ispol'zuem novye OP) ---
print(f"\\n--- Defining Transformer Layer Processors (0 to {num_layers-1}) using High-Level OPs ---")
# >>> IZMENENO: Udalena oshibochnaja stroka 'current_input = last_hidden_state' <<<
for layer_idx in range(num_layers):
    layer_tag = tag_layer(layer_idx)
    print(f"  Processing Layer {layer_idx}...")

    # --- 2.A Attention Processor ---
    try:
        coord_attn = TensorCoordinate(layer=layer_idx, group=processor_group_idx, nest=0, x=0)
        tags_attn = [TAG_TYPE_PROCESSOR, TAG_FUNC_ATTENTION, layer_tag, model_tag]
        kn_defs_attn = [
            {"p":"norm_weight_input", "t":[TAG_COMP_LAYERNORM, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights], "f":f"model.layers.{layer_idx}.input_layernorm.weight"},
            {"p":"q_weights",   "t":[TAG_COMP_ATTN_Q, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights], "f":f"model.layers.{layer_idx}.self_attn.q_proj.weight"},
            {"p":"q_bias",      "t":[TAG_COMP_ATTN_Q, layer_tag, model_tag, TAG_COMP_BIAS, prec_tag_weights],    "f":f"model.layers.{layer_idx}.self_attn.q_proj.bias", "opt": True},
            {"p":"k_weights",   "t":[TAG_COMP_ATTN_K, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights], "f":f"model.layers.{layer_idx}.self_attn.k_proj.weight"},
            {"p":"k_bias",      "t":[TAG_COMP_ATTN_K, layer_tag, model_tag, TAG_COMP_BIAS, prec_tag_weights],    "f":f"model.layers.{layer_idx}.self_attn.k_proj.bias", "opt": True},
            {"p":"v_weights",   "t":[TAG_COMP_ATTN_V, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights], "f":f"model.layers.{layer_idx}.self_attn.v_proj.weight"},
            {"p":"v_bias",      "t":[TAG_COMP_ATTN_V, layer_tag, model_tag, TAG_COMP_BIAS, prec_tag_weights],    "f":f"model.layers.{layer_idx}.self_attn.v_proj.bias", "opt": True},
            {"p":"o_weights",   "t":[TAG_COMP_ATTN_O, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights], "f":f"model.layers.{layer_idx}.self_attn.o_proj.weight"},
        ]
        knowledge_needs_attn = []
        missing_essential = False
        for kdef in kn_defs_attn:
            kid = find_knowledge_id(kdef["f"])
            is_opt = kdef.get("opt", False)
            if kid: knowledge_needs_attn.append({"param_name": kdef["p"], "tags": kdef["t"], "knowledge_id": kid, "optional": is_opt})
            elif not is_opt: missing_essential = True; print(f"ERROR: Missing essential knowledge for Attn L{layer_idx}: {kdef['p']} ({kdef['f']})")

        if not missing_essential:
            interface_attn = {
                "inputs": [ {"name": "hidden_state_in"}, {"name": "residual_input"}, {"name": "position_ids"}, {"name": "past_key", "optional": True}, {"name": "past_value", "optional": True}, {"name": "start_pos", "dtype": "int", "optional": True}, {"name": "total_seq_len", "dtype": "int", "optional": True} ],
                "outputs": [{"name": "attn_block_output"}], # Vyhod Attn + Pervyj Residual
                "knowledge_needed": knowledge_needs_attn
            }
            ops_sequences_attn = {'default': [
                [OP_STORE, 'residual_attn'],
                [OP_QWEN2_RMSNORM, {"norm_weight": "norm_weight_input", "eps": rms_norm_eps}],
                # Predpolagaem, chto OP_QWEN2_ATTENTION prinimaet normirovannyj vhod i vozvrashhaet vyhod O-projekcii
                # Obnovlenie kesha proishodit vnutri ili cherez step_context (nuzhno utochnit' pri realizacii operacii)
                [OP_QWEN2_ATTENTION, {
                    "q_weights": "q_weights", "k_weights": "k_weights", "v_weights": "v_weights", "o_weights": "o_weights",
                    "q_bias": "q_bias", "k_bias": "k_bias", "v_bias": "v_bias",
                    "position_ids": "position_ids", "past_key": "past_key", "past_value": "past_value",
                    "start_pos": "start_pos", "total_seq_len": "total_seq_len",
                    "num_heads": num_attention_heads, "num_kv_heads": num_key_value_heads, "head_dim": head_dim,
                    "layer_idx": layer_idx
                }],
                [OP_ADD, {"input_a": "residual_attn", "input_b": "_"}] # Pervyj Residual Add
            ]}
            create_and_save_processor(f"Attention Processor L{layer_idx}", coord_attn, tags_attn, interface_attn, ops_sequences_attn)
        else: processor_errors += 1
    except Exception as e: print(f"Error defining Attn L{layer_idx}: {e}"); processor_errors += 1

    # --- 2.B FFN Processor ---
    try:
        coord_ffn = TensorCoordinate(layer=layer_idx, group=processor_group_idx, nest=0, x=1)
        tags_ffn = [TAG_TYPE_PROCESSOR, TAG_FUNC_FFN, layer_tag, model_tag]
        kn_defs_ffn = [
            {"p": "norm_weight_post_attn", "t": [TAG_COMP_LAYERNORM, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights], "f": f"model.layers.{layer_idx}.post_attention_layernorm.weight"},
            {"p": "gate_weights", "t": [TAG_COMP_FFN_GATE, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights],  "f": f"model.layers.{layer_idx}.mlp.gate_proj.weight"},
            {"p": "up_weights",   "t": [TAG_COMP_FFN_UP, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights],    "f": f"model.layers.{layer_idx}.mlp.up_proj.weight"},
            {"p": "down_weights", "t": [TAG_COMP_FFN_DOWN, layer_tag, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights],  "f": f"model.layers.{layer_idx}.mlp.down_proj.weight"},
        ]
        knowledge_needs_ffn = []
        missing_essential = False
        for kdef in kn_defs_ffn:
            kid = find_knowledge_id(kdef["f"])
            is_opt = kdef.get("opt", False)
            if kid: knowledge_needs_ffn.append({"param_name": kdef["p"], "tags": kdef["t"], "knowledge_id": kid, "optional": is_opt})
            elif not is_opt: missing_essential = True; print(f"ERROR: Missing essential knowledge for FFN L{layer_idx}: {kdef['p']} ({kdef['f']})")

        if not missing_essential:
            interface_ffn = {
                "inputs": [{"name":"attn_block_output"}, {"name":"residual_input"}], # residual_input - eto vyhod Attn bloka dlja vtorogo residual
                "outputs": [{"name":"layer_output"}],
                "knowledge_needed": knowledge_needs_ffn
            }
            ops_sequences_ffn = {'default': [
                [OP_STORE, 'residual_ffn'], # Sohranjaem vyhod Attn bloka dlja vtorogo residual
                [OP_QWEN2_RMSNORM, {"norm_weight": "norm_weight_post_attn", "eps": rms_norm_eps}],
                # Predpolagaem, chto OP_QWEN2_MLP prinimaet normirovannyj vhod i vozvrashhaet vyhod Down projekcii
                [OP_QWEN2_MLP, {
                    "gate_weights": "gate_weights", "up_weights": "up_weights", "down_weights": "down_weights"
                }],
                [OP_ADD, {"input_a": "residual_ffn", "input_b": "_"}] # Vtoroj Residual Add
            ]}
            create_and_save_processor(f"FFN Processor L{layer_idx}", coord_ffn, tags_ffn, interface_ffn, ops_sequences_ffn)
        else: processor_errors += 1
    except Exception as e: print(f"Error defining FFN L{layer_idx}: {e}"); processor_errors += 1


# --- 3. Final Norm Processor ---
try:
    coord = TensorCoordinate(layer=-1, group=processor_group_idx, nest=0, x=1)
    tags = [TAG_TYPE_PROCESSOR, TAG_COMP_LAYERNORM, model_tag]
    kn_tags = [TAG_COMP_LAYERNORM, model_tag, TAG_COMP_WEIGHTS, prec_tag_weights]
    pattern = "model.norm.weight"
    kid = find_knowledge_id(pattern)
    if not kid: raise ValueError("Final Norm knowledge tensor ID not found in map.")
    knowledge_needs = [{"param_name": "norm_weight", "tags": kn_tags, "knowledge_id": kid}]
    interface = {"inputs": [{"name":"final_hidden_state"}], "outputs": [{"name":"final_normed_state"}], "knowledge_needed": knowledge_needs}
    ops_sequences = {'default': [[OP_QWEN2_RMSNORM, {"norm_weight": "norm_weight", "eps": rms_norm_eps}]]}
    create_and_save_processor("Final Norm Processor", coord, tags, interface, ops_sequences)
except Exception as e: print(f"Error defining Final Norm Processor: {e}"); processor_errors += 1


# --- 4. LM Head Processor (Ostavljaem nizkourovnevyj) ---
try:
    coord = TensorCoordinate(layer=-1, group=processor_group_idx, nest=0, x=2)
    tags = [TAG_TYPE_PROCESSOR, TAG_FUNC_LINEAR, model_tag]
    kn_tags = [TAG_COMP_LM_HEAD, model_tag, TAG_COMP_WEIGHTS, TAG_PREC_INT8]
    pattern = "lm_head.weight"
    kid = find_knowledge_id(pattern)
    if not kid: raise ValueError("LM Head knowledge tensor ID not found in map.")
    knowledge_needs = [{"param_name": "lm_head_weights", "tags": kn_tags, "knowledge_id": kid}]
    interface = {"inputs": [{"name":"final_normed_state"}], "outputs": [{"name":"logits"}], "knowledge_needed": knowledge_needs}
    ops_sequences = {'default': [[OP_LINEAR_HEAD, {"weights": "lm_head_weights"}]]}
    create_and_save_processor("LM Head Processor", coord, tags, interface, ops_sequences)
except Exception as e: print(f"Error defining LM Head Processor: {e}"); processor_errors += 1


# --- Sohranenie Karty Processorov i Etalonnyh Vyhodov ---
print(f"\\n--- Finalizing Cell 6 ({processor_errors} errors during processor creation) ---")

processor_map_filepath = DB_PATH / f"{map_model_name}_proc_map.pkl"
try:
    if processor_errors == 0:
        expected_proc_count = 3 + 2 * num_layers
        if len(processor_map) == expected_proc_count:
             with open(processor_map_filepath, 'wb') as f: pickle.dump(processor_map, f)
             print(f"Processor map saved to {processor_map_filepath} ({len(processor_map)} entries)")
        else:
             print(f"WARN: Processor map has incorrect entry count ({len(processor_map)} vs {expected_proc_count}). NOT SAVED.")
             processor_errors += 1
    else: print(f"Processor map NOT saved due to {processor_errors} errors.")
except Exception as e: print(f"Error saving processor map: {e}"); processor_errors += 1

ref_output_path = Path(REFERENCE_OUTPUT_FILENAME)
try:
    if hf_reference_outputs:
        with open(ref_output_path, 'wb') as f: pickle.dump(hf_reference_outputs, f, pickle.HIGHEST_PROTOCOL)
        print(f"Reference HF outputs saved to {ref_output_path} ({len(hf_reference_outputs)} entries)")
    else: print(f"WARN: No reference HF outputs were captured, file '{ref_output_path}' not saved."); processor_errors += 1
except Exception as e: print(f"Error saving reference outputs: {e}"); processor_errors += 1

# --- Ochistka ---
if 'vec' in locals() and vec and hasattr(vec, 'db') and vec.db:
    print("\\nClosing Veector DB connection...")
    vec.db.close()
gc.collect()
print("\\nMemory cleanup attempted.")

if processor_errors == 0: print(f"\\n--- Cell 6 Finished Successfully ---")
else: print(f"\\n--- Cell 6 Finished with {processor_errors} ERRORS ---")



In [None]:
# === Skript sravnenija Veector(fp16) s sohranennymi vyhodami HF v10 ===
# Cel': Sravnit' promezhutochnye vyhody Veector (fp16) s zaranee sohranennymi etalonnymi HF (fp32)

import time
import pickle
import numpy as np
import traceback
import os
from pathlib import Path
from functools import partial

# --- Neobhodimye biblioteki ---
try:
    import torch # Nuzhen dlja tipov dannyh v nekotoryh mestah
    from transformers import AutoTokenizer, AutoConfig # Gruzim tol'ko Tokenizer i Config
    print("Torch and Transformers imported successfully.")
except ImportError as e:
    print(f"FATAL ERROR: Missing essential libraries (torch, transformers): {e}")
    print("Please install them: pip install torch transformers accelerate")
    exit()

# --- Importy vashego proekta Veector ---
try:
    # Ubedites', chto put' k vashim fajlam pravil'nyj
    # import sys
    # sys.path.append('/content/src') # Primer

    from core import Veector, CORE_VERSION
    from tensors import TensorCoordinate, TENSORS_VERSION, GROUP_IDX_QWEN_KNOWLEDGE
    from operations import OPERATIONS_VERSION
    from veectordb import VeectorDB, VEECTORDB_VERSION

    print(f"Using Core: {CORE_VERSION}, Tensors: {TENSORS_VERSION}, Ops: {OPERATIONS_VERSION}, DB: {VEECTORDB_VERSION}")
    if CORE_VERSION < "0.7.9": print("WARN: Expected core v0.7.9+ for knowledge group fix logging.")
    if OPERATIONS_VERSION < "0.8.9": print("WARN: Expected operations v0.8.9+ for SDPA stability fix.")

    print("Veector components imported successfully.")
except ImportError as e:
    print(f"FATAL ERROR: Failed to import Veector components: {e}")
    print("Ensure core.py, tensors.py, operations.py, veectordb.py are accessible.")
    exit()
except Exception as e_other:
    print(f"FATAL ERROR during Veector imports: {e_other}")
    exit()

# --- Konfiguracija ---
DB_PATH = Path("./data/db") # Put' k vashej BD Veector v Colab
MODEL_SOURCE = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" # Ispol'zuetsja dlja tokenizatora i konfiga
TOKENIZER_SOURCE = MODEL_SOURCE
REFERENCE_OUTPUT_FILENAME = "hf_reference_outputs_fp32.pkl" # Fajl s etalonnymi vyhodami

PROMPT = "Hello, how are you?" # Tot zhe prompt, chto ispol'zovalsja pri sozdanii etalonov
NEST_LEVEL = 1 # Veector budet rabotat' v float16
COMPARISON_TOLERANCE_ATOL = 5e-3 # Absoljutnyj dopusk
COMPARISON_TOLERANCE_RTOL = 1e-3 # Otnositel'nyj dopusk
KNOWLEDGE_GROUP_ID = GROUP_IDX_QWEN_KNOWLEDGE # = 100
FALLBACK_MAX_SEQ_LEN = 2048

# --- Zagruzka Tokenizatora, Konfiga Modeli, Veector ---
print("\\n--- Loading Tokenizer, Config, and Veector ---")
tokenizer = None
vec = None
processor_map = None
model_config = None
num_layers = 0
num_kv_heads = 0
head_dim = 0
max_seq_len = FALLBACK_MAX_SEQ_LEN

try:
    print(f"Loading Tokenizer from: {TOKENIZER_SOURCE}")
    tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_SOURCE, trust_remote_code=True)
    print(f"Tokenizer class: {tokenizer.__class__.__name__}")

    # Gruzim konfig otdel'no
    print(f"Loading Config from: {MODEL_SOURCE}")
    model_config = AutoConfig.from_pretrained(MODEL_SOURCE, trust_remote_code=True)
    num_layers = model_config.num_hidden_layers
    num_kv_heads = getattr(model_config, 'num_key_value_heads', model_config.num_attention_heads)
    head_dim = model_config.hidden_size // model_config.num_attention_heads
    max_seq_len = getattr(model_config, 'max_position_embeddings', FALLBACK_MAX_SEQ_LEN)
    print(f"Config loaded: L={num_layers}, KVH={num_kv_heads}, HDim={head_dim}, MaxSeqLen={max_seq_len}")

    print(f"Initializing Veector from DB: {DB_PATH.resolve()}")
    vec = Veector(db_dir=DB_PATH)
    map_model_name = MODEL_SOURCE.split('/')[-1]
    proc_map_file = DB_PATH / f"{map_model_name}_proc_map.pkl"
    with open(proc_map_file, 'rb') as f:
        processor_map = pickle.load(f)
    print(f"Veector initialized, loaded processor map ({len(processor_map)} entries).")

    # --- DIAGNOSTIC CHECK: Try loading Embedding Processor directly ---
    try:
        embedding_proc_id = processor_map.get("embedding")
        if embedding_proc_id:
            print(f"Attempting to load Embedding Processor (ID: {embedding_proc_id}) directly...")
            embedding_structure = vec.load_tensor(embedding_proc_id, load_knowledge=False)
            if embedding_structure and vec.validate_tensor(embedding_structure):
                print("  SUCCESS: Embedding processor structure loaded and validated OK.")
                # Mozhno vyvesti chast' struktury dlja proverki
                # from tensors import get_tensor_interface, get_processor_ops_sequences
                # print("    Interface:", get_tensor_interface(embedding_structure))
                # print("    Ops Sequence:", get_processor_ops_sequences(embedding_structure))
            elif embedding_structure:
                print("  ERROR: Embedding processor structure loaded BUT FAILED validation.")
            else:
                print(f"  ERROR: Failed to load tensor structure for ID: {embedding_proc_id}")
        else:
            print("  ERROR: 'embedding' key not found in processor_map.")
    except Exception as diag_e:
        print(f"  ERROR during diagnostic check: {diag_e}")
    # --- END DIAGNOSTIC CHECK ---

except Exception as e:
    print(f"FATAL ERROR during loading: {e}")
    traceback.print_exc()
    exit()

# --- Zagruzka Etalonnyh Vyhodov ---
print(f"\\n--- Loading Reference HF Outputs from {REFERENCE_OUTPUT_FILENAME} ---")
hf_outputs = None
try:
    ref_output_path = Path(REFERENCE_OUTPUT_FILENAME)
    if not ref_output_path.is_file():
        raise FileNotFoundError(f"Reference output file not found: {ref_output_path.resolve()}. Please run the script to generate reference outputs first.")
    with open(ref_output_path, 'rb') as f:
        hf_outputs = pickle.load(f)
    if not isinstance(hf_outputs, dict):
        raise TypeError("Loaded reference data is not a dictionary.")
    print(f"Successfully loaded {len(hf_outputs)} reference outputs.")
    # print(f"Available keys: {list(hf_outputs.keys())}")
except Exception as e:
    print(f"FATAL ERROR loading reference outputs: {e}")
    exit()


# --- Podgotovka vhodnyh dannyh ---
print("\\n--- Preparing Input IDs ---")
prompt_input_ids_np = None
input_seq_len = 0
try:
    messages = [{"role": "user", "content": PROMPT}]
    print("Applying chat template...")
    prompt_input_ids_np = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="np"
    )
    if prompt_input_ids_np.ndim == 1:
        prompt_input_ids_np = np.expand_dims(prompt_input_ids_np, axis=0)

    input_seq_len = prompt_input_ids_np.shape[1]

    print(f"Input IDs shape: {prompt_input_ids_np.shape}")
    print(f"Input Sequence Length: {input_seq_len}")
    print(f"Decoded Input: '{tokenizer.decode(prompt_input_ids_np[0])}'")
except Exception as e:
    print(f"FATAL ERROR preparing input: {e}")
    exit()


# --- Vypolnenie Veector i izvlechenie rezul'tatov ---
print("\\n--- Running Veector Processors (float16) ---")
veector_outputs = {}
last_hidden_state = None
error_occurred = False

batch_size = prompt_input_ids_np.shape[0]
cache_dtype = np.float16
initial_cache_shape = (batch_size, num_kv_heads, max_seq_len, head_dim)
print(f"Initializing zero KV cache for Veector with shape: {initial_cache_shape}")
initial_past_key = np.zeros(initial_cache_shape, dtype=cache_dtype)
initial_past_value = np.zeros(initial_cache_shape, dtype=cache_dtype)
current_kv_cache_list = [(initial_past_key.copy(), initial_past_value.copy()) for _ in range(num_layers)]

try:
    # 1. Embedding
    print("  Running Veector Embedding...")
    embed_context = {
        "input_data": prompt_input_ids_np,
        "required_nest": NEST_LEVEL, # = 1 (float16)
        "target_knowledge_group": KNOWLEDGE_GROUP_ID
    }
    embed_result = vec.compute(processor_map["embedding"], context=embed_context)
    if embed_result.get("status") == "completed":
        last_hidden_state = embed_result.get("data")
        veector_outputs["embed_tokens"] = last_hidden_state
        print("    Embedding OK.")
    else:
        print(f"    ERROR in Embedding: {embed_result.get('provenance', {}).get('error')}")
        error_occurred = True

    # 2. Sloi (posledovatel'no Attn + FFN)
    current_input = last_hidden_state
    if not error_occurred:
        for i in range(num_layers):
            print(f"  Running Veector Layer {i}...")
            if current_input is None:
                 print(f"    ERROR: Input for Layer {i} is None.")
                 error_occurred = True
                 break

            attn_proc_id = processor_map[f"attn_{i}"]
            current_position_ids = np.arange(0, input_seq_len, dtype=np.int64).reshape(1, input_seq_len)
            attn_context = {
                "input_data": current_input,
                "residual_input": current_input,
                "required_nest": NEST_LEVEL,
                "target_knowledge_group": KNOWLEDGE_GROUP_ID,
                "position_ids": current_position_ids,
                "total_seq_len": input_seq_len,
                "past_key": current_kv_cache_list[i][0],
                "past_value": current_kv_cache_list[i][1],
                "start_pos": 0
            }
            attn_result = vec.compute(attn_proc_id, context=attn_context)

            if attn_result.get("status") == "completed":
                attn_hidden_state = attn_result.get("data")
                if attn_hidden_state is None:
                    print(f"    ERROR: Attn L{i} returned None in 'data' field.")
                    error_occurred = True; break

                result_step_context = attn_result.get("step_context", {})
                returned_k = result_step_context.get('k_cache_out')
                returned_v = result_step_context.get('v_cache_out')
                if returned_k is None or returned_v is None:
                     print(f"    WARN: K/V cache not found in step_context for Attn L{i}.")

                print(f"    Attn L{i} OK.")

                ffn_proc_id = processor_map[f"ffn_{i}"]
                ffn_context = {
                    "input_data": attn_hidden_state,
                    "residual_input": attn_hidden_state,
                    "required_nest": NEST_LEVEL,
                    "target_knowledge_group": KNOWLEDGE_GROUP_ID
                }
                ffn_result = vec.compute(ffn_proc_id, context=ffn_context)

                if ffn_result.get("status") == "completed":
                    layer_output = ffn_result.get("data")
                    # Proverka na None pered sohraneniem
                    if layer_output is None:
                        print(f"    ERROR: FFN L{i} returned None in 'data' field.")
                        error_occurred = True; break
                    veector_outputs[f"layer_{i}_output"] = layer_output
                    current_input = layer_output
                    print(f"    FFN L{i} OK.")
                else:
                    print(f"    ERROR in FFN L{i}: {ffn_result.get('provenance', {}).get('error')}")
                    error_occurred = True; break
            else:
                print(f"    ERROR in Attn L{i}: {attn_result.get('provenance', {}).get('error')}")
                error_occurred = True; break

    # 3. Final Norm
    if not error_occurred and current_input is not None:
        print("  Running Veector Final Norm...")
        norm_context = {
            "input_data": current_input,
            "required_nest": NEST_LEVEL,
            "target_knowledge_group": KNOWLEDGE_GROUP_ID
        }
        norm_result = vec.compute(processor_map["final_norm"], context=norm_context)
        if norm_result.get("status") == "completed":
            final_norm_output = norm_result.get("data")
            if final_norm_output is None:
                 print(f"    ERROR: Final Norm returned None in 'data' field.")
                 error_occurred = True
            else:
                 veector_outputs["final_norm"] = final_norm_output
                 last_hidden_state = final_norm_output
                 print("    Final Norm OK.")
        else:
            print(f"    ERROR in Final Norm: {norm_result.get('provenance', {}).get('error')}")
            error_occurred = True

    # 4. LM Head
    if not error_occurred and last_hidden_state is not None:
        print("  Running Veector LM Head...")
        lm_head_context = {
            "input_data": last_hidden_state,
            "required_nest": NEST_LEVEL,
            "target_knowledge_group": KNOWLEDGE_GROUP_ID
        }
        lm_head_result = vec.compute(processor_map["lm_head"], context=lm_head_context)
        if lm_head_result.get("status") == "completed":
            lm_head_output = lm_head_result.get("data")
            if lm_head_output is None:
                 print(f"    ERROR: LM Head returned None in 'data' field.")
                 error_occurred = True
            else:
                 veector_outputs["lm_head"] = lm_head_output
                 print("    LM Head OK.")
        else:
            print(f"    ERROR in LM Head: {lm_head_result.get('provenance', {}).get('error')}")
            error_occurred = True

except Exception as e:
    print(f"FATAL ERROR during Veector execution: {e}")
    traceback.print_exc()
    error_occurred = True
finally:
    if vec and hasattr(vec, 'db') and vec.db:
        try:
            vec.db.close()
            print("Veector DB connection closed.")
        except Exception as db_close_e:
            print(f"Error closing Veector DB: {db_close_e}")

# --- Sravnenie Rezul'tatov ---
print("\\n--- Comparing Outputs (Veector fp16 vs HF fp32) ---")
first_difference_found = False

comparison_keys = ["embed_tokens"]
for i in range(num_layers):
    comparison_keys.append(f"layer_{i}_output")
comparison_keys.append("final_norm")
comparison_keys.append("lm_head")

if not error_occurred and hf_outputs:
    for key in comparison_keys:
        print(f"Comparing: {key}")
        hf_out = hf_outputs.get(key)
        vec_out = veector_outputs.get(key)

        if hf_out is None or vec_out is None:
            print(f"  ERROR: Output missing for {key} (HF: {'OK' if hf_out is not None else 'MISSING'}, Veector: {'OK' if vec_out is not None else 'MISSING'})")
            if key not in hf_outputs: print(f"    Key '{key}' not found in loaded reference file '{REFERENCE_OUTPUT_FILENAME}'.")
            if key not in veector_outputs: print(f"    Key '{key}' not found in Veector outputs.")
            first_difference_found = True
            break

        print(f"  HF Shape (fp32): {hf_out.shape}, dtype: {hf_out.dtype}")
        print(f"  Veector Shape (fp16): {vec_out.shape}, dtype: {vec_out.dtype}")

        if hf_out.shape != vec_out.shape:
            print(f"  ERROR: Shape mismatch for {key}!")
            first_difference_found = True
            break

        try:
            hf_out_f32 = hf_out # Uzhe float32
            vec_out_f32 = vec_out.astype(np.float32) # Privodim Veector k float32

            are_close = np.allclose(
                hf_out_f32,
                vec_out_f32,
                atol=COMPARISON_TOLERANCE_ATOL,
                rtol=COMPARISON_TOLERANCE_RTOL
            )
            print(f"  Result: {'CLOSE' if are_close else '!!! DIFFERENT !!!'}")

            if not are_close:
                diff = np.abs(hf_out_f32 - vec_out_f32)
                max_diff = np.max(diff)
                mean_diff = np.mean(diff)
                print(f"    Max Abs Difference:  {max_diff:.6f}")
                print(f"    Mean Abs Difference: {mean_diff:.6f}")
                print(f"    HF Sample (fp32):      {hf_out.flatten()[:5]}")
                print(f"    Veector Sample (fp16): {vec_out.flatten()[:5]}")
                first_difference_found = True
                break

        except Exception as cmp_e:
            print(f"  ERROR during comparison for {key}: {cmp_e}")
            first_difference_found = True
            break
elif not hf_outputs:
     print("Comparison skipped because reference HF outputs were not loaded.")
else:
    print("Comparison skipped due to errors during Veector execution.")

if not first_difference_found and not error_occurred and hf_outputs:
    print("\\nSUCCESS: All compared outputs are close!")
elif not error_occurred:
    print("\\nFAILURE: Differences found. Check the output above for the first mismatch.")

print(f"\\n--- Comparison Script Finished ---")



In [None]:
# Архивация и скачивание
import shutil
shutil.make_archive("model_DeepSeek-r1-distill-1.5b", "zip", "data")
zip_name = "model_DeepSeek-r1-distill-1.5b.zip"

In [None]:
# Выгрузка на Google Drive
drive.mount('/content/drive', force_remount=True)
destination_path = f"/content/drive/My Drive/models/"
shutil.copy(zip_name, destination_path)
print(f"🟢 [LOG] ✅ Архив загружен на Google Drive: {destination_path}")