In [15]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [4]:
from llama_index.embeddings import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
from llama_index.llms import LlamaCPP

model_url = "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_M.gguf"

llm = LlamaCPP(
    model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=512,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 30},
    verbose=True,
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [5]:
from llama_index import ServiceContext

service_context = ServiceContext.from_defaults(
    llm=llm, embed_model=embed_model
)

In [6]:
import chromadb

from llama_index import SimpleDirectoryReader, VectorStoreIndex
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext


new = False
input_dir = "./ainu_papers/"
collection_name = input_dir.split("/")[-2]

db = chromadb.PersistentClient(path="./db")
try:
    chroma_collection = db.get_collection(collection_name)
except:
    new = True
    documents = SimpleDirectoryReader(input_dir=input_dir).load_data()
    chroma_collection = db.create_collection(collection_name)

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

if new:
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = VectorStoreIndex.from_documents(
        documents, storage_context=storage_context, service_context=service_context
    )
else:
    index = VectorStoreIndex.from_vector_store(
    vector_store,
    service_context=service_context,
)

In [7]:
# Query Data from the persisted index
query_engine = index.as_query_engine()

In [8]:
response = query_engine.query("Why Nickel is chosen as material for the experimentation?")
print(response)