<a href="https://colab.research.google.com/github/jayaraman1112g/ai_snippets/blob/main/llamainde_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU llama-index
!pip install -qU transformers
!pip install -qU sentence-transformers

In [None]:
%pip install -qU llama-index-embeddings-huggingface
%pip install -qU llama-index-llms-huggingface

# Read the document

In [None]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True'

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Settings


documents = SimpleDirectoryReader(
    input_files=["sample_data/DISEASE.pdf"]
).load_data()

# Setup the LLM

In [None]:
system_prompt = """
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""
import torch
from llama_index.llms.huggingface import HuggingFaceLLM

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",
    # loading model in 8bit for reducing memory
    model_kwargs={"torch_dtype": torch.float16 }
)

# Setup the embedding

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from google.colab import userdata
hf_token = userdata.get('HF_TOKEN')
embed_model = HuggingFaceEmbedding (model_name='BAAI/bge-small-en-v1.5',token=hf_token )

# get index from VectorStoreIndex

In [None]:
from llama_index.core import ServiceContext
service_context=ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)
index = VectorStoreIndex.from_documents (documents, service_context = service_context)

# Obtain and retriever and qe from Index

In [None]:
from llama_index.core.response.notebook_utils import display_source_node
retriever = index.as_retriever ()
nodes = retriever.retrieve ("What is diphtheria?")
for node in nodes:
    display_source_node(node)

In [None]:
qe = index.as_query_engine ()
nodes = qe.query ("What is diptheria")
print (nodes)

# setup vector store

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!pip install -qU chromadb

In [None]:
import chromadb
client = chromadb.Client()
db = client.get_or_create_collection("test_llamaindex")
print (len(documents))
for i,doc in enumerate (documents) :
  # print (i)
  db.add (documents=[doc], ids=str(i))


# pd.DataFrame(db.peek(0))

In [None]:
%pip install -qU llama-index-vector-stores-qdrant

In [None]:
from llama_index.vector_stores.qdrant import QdrantVectorStore

import qdrant_client

client = qdrant_client.QdrantClient(location=":memory:")
vector_store = QdrantVectorStore(client=client, collection_name="test_store")

# Setup ingestion pipeline

In [None]:
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor

pipeline = IngestionPipeline (
            transformations=[
                SentenceSplitter (chunk_size = 200, chunk_overlap=10),
                TitleExtractor (llm=llm),
                embed_model
                # HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
            ],
            vector_store=vector_store
)


In [None]:
import nest_asyncio
nest_asyncio.apply()
nodes = pipeline.run(documents=documents)

In [None]:
index = VectorStoreIndex.from_vector_store (vector_store=vector_store, service_context=service_context)

In [None]:
qe = index.as_query_engine ()
print (qe.query ("What is diptheria?"))