In [20]:
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, PromptTemplate, set_global_service_context
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.ollama import Ollama
from langchain.embeddings import HuggingFaceEmbeddings
import torch


def build_llm_model(doc_folder: str) -> HuggingFaceLLM:

    # Load documents
    documents = SimpleDirectoryReader(doc_folder).load_data()

    # Define system prompt and query wrapper prompt
    system_prompt = """You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided."""
    query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")

    # Create an LLM instance
    llm = HuggingFaceLLM(
        context_window=4096,
        max_new_tokens=256,
        generate_kwargs={"temperature": 0.0, "do_sample": False},
        system_prompt=system_prompt,
        query_wrapper_prompt=query_wrapper_prompt,
        tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
        model_name="meta-llama/Meta-Llama-3-8B-Instruct",
        device_map="auto",
        # uncomment this if using CUDA to reduce memory usage
        model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
    )

    # Create an embedding model
    embed_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

    service_context=ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)
    # Create a vector store index
    index=VectorStoreIndex.from_documents(documents,service_context=service_context)

    return index

index = build_llm_model("/data")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
  service_context=ServiceContext.from_defaults(


In [22]:
query_engine=index.as_query_engine()
# response = service_context.generate_response(question)
response=query_engine.query('How does NOMAD support the transformation of materials-science data into knowledge and understanding')
print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 According to the provided context, NOMAD supports the transformation of materials-science data into knowledge and understanding by providing a platform for formalizing data acquisition, organizing and sharing data, homogenizing and normalizing data for analysis, and integrating with analysis tools. This enables researchers to put their data into machine and human comprehensible representations, making it FAIR (Findable, Accessible, Interoperable, and Reusable). Additionally, NOMAD provides an API and libraries for accessing and analyzing the NOMAD Archive data via state-of-the-art AI tools, allowing for the application of sophisticated AI algorithms to materials-science data. The NOMAD AI Toolkit also offers a user-friendly infrastructure for applying the latest AI developments and popular machine-learning methods to materials-science data, facilitating the deployment of AI-powered methodologies in the field. By providing a platform for data sharing, analysis, and publication, NOMAD s