In [1]:
import sys
from chaski.models.llm import LLM
from chaski.utils.config import Config
from chaski.utils.path_utils import get_outputs_dir

# regular mistral instruct format
def sci_prompt(query, *args, **kwargs):
    return f"""### System:
{Config.SYSTEM_MESSAGE}

### Instruction:
{query}

### Response:
    """

# Define a function to create a RAG-formatted prompt
def rag_prompt_sci(query, context):
    """Generates a prompt formatted for RAG with user query and context."""
    return f"""### System:
{Config.SYSTEM_MESSAGE}

### Instruction:
{query}

### Response:
{context}
    """

# regular mistral instruct format
def prompt_mistral(query, *args, **kwargs):
    """Generates a prompt formatted for RAG with user query and context."""
    return f"{query}"

# Define a function to create a RAG-formatted prompt
def rag_prompt_mistral(query, context):
    """Generates a prompt formatted for RAG with user query and context."""
    return f"{query}, with the following context: ```{context}```\n"


# Where to save the output embeddings
out_dir = get_outputs_dir() / 'blender_embeddings'
out_dir.mkdir(exist_ok=True)
file_path = out_dir / "embeddings_v1"


# Initialize the LLM
print("Initializing the LLM Manager...")
llm = LLM(
    # model_path=Config.MODEL_PATH,
    model_path="/Users/cck/projects/chaski-llm/chaski/models/sciphi-self-rag-mistral-7b-32k.Q4_K_M.gguf",
    use_embeddings=True,  # Enable embeddings
    embedding_model_info=Config.DEFAULT_EMBEDDINGS,  # Use default embedding settings
)

# read in the chunks
import pickle
with open('chunks_v1.pkl', 'rb') as f:
    chunked_data = pickle.load(f)


LLM EMBEDS: True
llama_model_loader: loaded meta data with 21 key-value pairs and 291 tensors from /Users/cck/projects/chaski-llm/chaski/models/sciphi-self-rag-mistral-7b-32k.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = sciphi_sciphi-self-rag-mistral-7b-32k
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count

Initializing the LLM Manager...


AVX = 0 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 0 | 
Model metadata: {'general.quantization_version': '2', 'tokenizer.ggml.padding_token_id': '32015', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'tokenizer.ggml.bos_token_id': '1', 'tokenizer.ggml.model': 'llama', 'llama.attention.head_count_kv': '8', 'llama.context_length': '32768', 'llama.attention.head_count': '32', 'llama.rope.freq_base': '10000.000000', 'llama.rope.dimension_count': '128', 'general.file_type': '15', 'llama.feed_forward_length': '14336', 'llama.embedding_length': '4096', 'llama.block_count': '32', 'general.architecture': 'llama', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'general.name': 'sciphi_sciphi-self-rag-mistral-7b-32k'}
Using fallback chat format: None


In [2]:

# Embed and store the sample documents
print("Embedding and storing documents...")
for key, chunks in chunked_data.items():
    for chunk in chunks:
        llm.embed_and_store(chunk, do_chunk=False)



Embedding and storing documents...


In [3]:

# # Save the embeddings to a file for persistence
# print("Saving the embeddings to a file...")
# llm.embeds.save_to_file(file_path)

# # Load the embeddings from the saved file
# print("Loading the embeddings from the file...")
# llm.embeds.load_from_file(file_path)



In [4]:

# Define a new query to search the embeddings
query = "Tell me about the MeasureIt addon in Blender"

# Search the embeddings for the top-n most similar to the query
print("Searching for similar documents...")
top_n = 2
top_similar = llm.embeds.find_top_n(query, n=top_n)

# Extract the context from the top similar embeddings
context = "\n".join([text for _, _, text in top_similar])

Searching for similar documents...


In [5]:
top_similar

[('aabc7c21c44a171917d828363ff3bd3fa114a583190f110314c4acc58e418541',
  0.3446535649865773,
  '* [Add-ons](../index.html)\n* [3D View](index.html)\n* MeasureIt\n# MeasureIt\nMeasureIt is an add-on designed for displaying measures in the viewport,\nmaking the process of design objects with exact measures, easier. These tools\nare extremely useful for any job that requires exact measurements, including\narchitectural projects, technical design and 3D printing.\n## Activation\n* Open Blender and go to Preferences then the Add-ons tab.\n* Click 3D View then MeasureIt to enable the script.\n## Interface\n### Overview\nLocated in the 3D Viewport ‣ Sidebar ‣ View tab. The MeasureIt Tools panel is\ndescribed below.\nTo view the measures you need to press the _Show_ button. Many measure styles\nappear grayed out in the menu, these are active in Edit Mode.\n* The Mesh Debug sub panel has extra display options.\n* The Items sub panel appears after adding a measure. This contains the color setting

In [6]:
rag_prompt = rag_prompt_sci(query, context)
prompt = sci_prompt(query)

rag_prompt, prompt

('### System:\nYou are a helpful AI assistant.\n\n### Instruction:\nTell me about the MeasureIt addon in Blender\n\n### Response:\n* [Add-ons](../index.html)\n* [3D View](index.html)\n* MeasureIt\n# MeasureIt\nMeasureIt is an add-on designed for displaying measures in the viewport,\nmaking the process of design objects with exact measures, easier. These tools\nare extremely useful for any job that requires exact measurements, including\narchitectural projects, technical design and 3D printing.\n## Activation\n* Open Blender and go to Preferences then the Add-ons tab.\n* Click 3D View then MeasureIt to enable the script.\n## Interface\n### Overview\nLocated in the 3D Viewport ‣ Sidebar ‣ View tab. The MeasureIt Tools panel is\ndescribed below.\nTo view the measures you need to press the _Show_ button. Many measure styles\nappear grayed out in the menu, these are active in Edit Mode.\n* The Mesh Debug sub panel has extra display options.\n* The Items sub panel appears after adding a meas

In [7]:

# Generate a response using the RAG-augmented prompt
print("Generating the response using RAG...")
rag_response = llm.generate_response(rag_prompt)

# Compare with a standard response without RAG
print("Generating the response without RAG...")
response = llm.generate_response(prompt)


Generating the response using RAG...



llama_print_timings:        load time =   12266.27 ms
llama_print_timings:      sample time =       8.67 ms /    76 runs   (    0.11 ms per token,  8770.92 tokens per second)
llama_print_timings: prompt eval time =   12265.99 ms /   436 tokens (   28.13 ms per token,    35.55 tokens per second)
llama_print_timings:        eval time =    3481.19 ms /    75 runs   (   46.42 ms per token,    21.54 tokens per second)
llama_print_timings:       total time =   15866.37 ms /   511 tokens
Llama.generate: prefix-match hit


Generating the response without RAG...



llama_print_timings:        load time =   12266.27 ms
llama_print_timings:      sample time =       9.05 ms /    93 runs   (    0.10 ms per token, 10273.97 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =    4120.08 ms /    93 runs   (   44.30 ms per token,    22.57 tokens per second)
llama_print_timings:       total time =    4251.18 ms /    94 tokens


In [8]:
prompt

'### System:\nYou are a helpful AI assistant.\n\n### Instruction:\nTell me about the MeasureIt addon in Blender\n\n### Response:\n    '

In [9]:
# Output the results with and without RAG
print(f"Response without RAG:\n{response}")


Response without RAG:
2.How to use it:
    - Open Blender and select the objects you want to measure.
    -[Retrieval]<paragraph> Install MeasureIt_2.0b1_windows.exe
    - Installation is simple, download and run the installer, then restart Blender</paragraph>[Relevant] - Open the MeasureIt add-on by going to "Add-ons" > "MeasureIt" in the main menu.[No support / Contradictory][Utility:4]


In [10]:
print(f"RAG-Response:\n{rag_response}")


RAG-Response:
4. Click the _Open Settings_ button to open the settings menu, where you can customize the appearance of the measurements, including font size, color, and transparency.
## Features
* Display of several measures directly in the 3D viewport.
*[No Retrieval] Distance measuring between points (also diagonally).[No Retrieval] - Perpendicular distance from a
