In [14]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import SimpleDirectoryReader
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core import Settings
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from transformers import AutoTokenizer
from llama_index.core import set_global_tokenizer
from llama_index.core.text_splitter import CodeSplitter
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.packs.code_hierarchy import (
    CodeHierarchyAgentPack,
    CodeHierarchyNodeParser,
)
from pathlib import Path
from bs4 import BeautifulSoup
import psycopg 
from llama_index.core import PromptTemplate
import os
from dotenv import load_dotenv
import nest_asyncio

In [2]:
load_dotenv("/setup/on.env")
pg_user = os.getenv("POSTGRES_USER")
pg_db = os.getenv("POSTGRES_DB")
pg_pwd = os.getenv("POSTGRES_PASSWORD")

In [3]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-14B-Instruct")

Settings.embed_model = HuggingFaceEmbedding(
    model_name = "BAAI/bge-base-en-v1.5"
)

set_global_tokenizer(tokenizer.encode)

In [4]:
def drop(name):
    with psycopg.connect(
        f"host=postgres dbname={pg_db} user={pg_user} password={pg_pwd}"
    ) as conn:
        with conn.cursor() as cur:
            cur.execute(f"""
                drop table if exists {name};
                """)
            conn.commit()


drop("data_code")

In [5]:
vector_store = PGVectorStore.from_params(
    database=pg_db,
    host="postgres",
    password=pg_pwd,
    port=5432,
    user=pg_user,
    table_name="code",
    embed_dim=768,
    hnsw_kwargs={
        "hnsw_m": 14,
        "hnsw_ef_construction": 72,
        "hnsw_ef_search": 52,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [6]:
language_ext = {"python": [".py"], "lua": [".lua"], "c": [".c", ".h"]}

indexes = {}
for i in language_ext:
    documents = SimpleDirectoryReader(
        input_dir="/notebooks/data/crow-repository/",
        recursive=True,
        required_exts=language_ext[i],
        file_metadata=lambda x: {"filepath": x},
    ).load_data()
    split_nodes = CodeSplitter(
            language=i, max_chars=1024, chunk_lines=20
        ).get_nodes_from_documents(documents)
    index = VectorStoreIndex(split_nodes, storage_context=storage_context, 
                             show_progress=True, embed_model=Settings.embed_model,
                            transformations=[CodeSplitter(language=i, max_chars=1024, chunk_lines=20)])
    indexes.update({i: index})

Generating embeddings:   0%|          | 0/49 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/431 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/616 [00:00<?, ?it/s]

In [7]:
[indexes[i] for i in indexes]

[<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7fd7146408f0>,
 <llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7fd889be4ef0>,
 <llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7fd6f458f8f0>]

In [8]:
def completion_to_prompt(completion):
   return f"<|im_start|>system\n<|im_end|>\n<|im_start|>user\n{completion}<|im_end|>\n<|im_start|>assistant\n"

def messages_to_prompt(messages):
    prompt = ""
    for message in messages:
        if message.role == "system":
            prompt += f"<|im_start|>system\n{message.content}<|im_end|>\n"
        elif message.role == "user":
            prompt += f"<|im_start|>user\n{message.content}<|im_end|>\n"
        elif message.role == "assistant":
            prompt += f"<|im_start|>assistant\n{message.content}<|im_end|>\n"

    if not prompt.startswith("<|im_start|>system"):
        prompt = "<|im_start|>system\n" + prompt
        

    prompt = prompt + '<|im_start|>"You are Qwen, created by Alibaba Cloud. You are a helpful assistant. You answer user queries about the crow software library using retreived source code data from the library repository.\n'

    return prompt

llm = LlamaCPP(
    model_url="https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q6_K.gguf",
    temperature=0.1,
    max_new_tokens=1024,
    context_window=12384,
    generate_kwargs={"repeat_penalty": 1.15, "top_k": 0, "top_p": 0.5, "min_p": 0.1},
    model_kwargs={
        "n_gpu_layers": -1,
    },
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

Settings.llm = llm

ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    yes
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 4060 Ti, compute capability 8.9, VMM: yes
llama_load_model_from_file: using device CUDA0 (NVIDIA GeForce RTX 4060 Ti) - 14287 MiB free
llama_model_loader: loaded meta data with 38 key-value pairs and 579 tensors from /llamaindex_cache/models/Qwen2.5-Coder-14B-Instruct-Q6_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen2
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Qwen2.5 Coder 14B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv

In [15]:
retriever = QueryFusionRetriever(
    [indexes[i].as_retriever() for i in indexes],
    similarity_top_k=2,
    num_queries=4,
    use_async=True,
    verbose=True,
)
query_engine = RetrieverQueryEngine.from_args(retriever)
nest_asyncio.apply()

In [16]:
response = query_engine.query(
    "Using the provided context, answer the following query: How can I use the lua ASL library to generate a melody? Can you provide an example?"
)

Llama.generate: 81 prefix-match hit, remaining 1 prompt tokens to eval
llama_perf_context_print:        load time =     657.18 ms
llama_perf_context_print: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /    39 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =    2697.47 ms /    40 tokens


Generated queries:
1. How to use lua ASL library for melody generation
2. Example of generating a melody using the lua ASL library
3. Lua ASL library tutorial: Melody creation


Llama.generate: 8 prefix-match hit, remaining 800 prompt tokens to eval
llama_perf_context_print:        load time =     657.18 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   800 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   401 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   26424.08 ms /  1201 tokens


In [17]:
print(response)

To generate a melody using the lua ASL library, you can use functions like `note`, `lfo`, `oscillate`, and `pulse`. Here's an example of how to create a simple melody:

```lua
-- Define the notes for the melody
local melody_notes = {60, 62, 64, 65, 67, 69}

-- Create a sequence of notes with durations
local melody_sequence = {}
for _, note in ipairs(melody_notes) do
    table.insert(melody_sequence, note(1/8)) -- Play each note for an eighth beat
end

-- Combine the notes into a single ASL descriptor
local melody_descriptor = asl.sequence(unpack(melody_sequence))

-- Apply an LFO to modulate the pitch of the melody
local lfo_descriptor = lfo(2, 10, 'sine') -- Create an LFO with a frequency of 2 Hz and amplitude of 10

-- Combine the melody descriptor with the LFO descriptor
local final_melody_descriptor = asl.multiply(melody_descriptor, lfo_descriptor)

-- Play the final melody on output 1
output[1](final_melody_descriptor)
```

In this example, we first define a sequence of notes for 