In [None]:
# The api key is exahausted so that's why I didn't ran the cells

# for importing the api key make variable.env and store your 'OPENAI_API_KEY' 
import os
api_key = os.environ.get('OPENAI_API_KEY')

In [2]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

document = SimpleDirectoryReader(r'data').load_data()

In [None]:
index = VectorStoreIndex.from_documents(document, show_progress=True)

In [None]:
query_engine = index.as_query_engine()

In [None]:
response = query_engine.query("").response

In [None]:
response

## pprint Response

In [None]:
# This pprint shows how the 3 component (retrievers, node-postprocessor, reponses-synthesizer) works
from llama_index.core.response.pprint_utils import pprint_response
pprint_response(response, show_source=True)

# Retriever

In [None]:
# Retieriver is used to fetch the top 10 indexes 
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine

retriever = VectorIndexRetriever(
    index = index,
    similarity_top_k= 3
)

In [None]:
query_engine = RetrieverQueryEngine(retriever=retriever)

In [None]:
query_engine()

# SimilarityPostProcessor

In [None]:
# This Similarity Post Processor will help to set the threshold on the similarity score
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

s_processor = SimilarityPostprocessor(similarity_cutoff=0.75)
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=3
)

query_engine = RetrieverQueryEngine(retriever=retriever, node_postprocessors=[s_processor])

In [None]:
response = query_engine.query("")

# Persisting Index

In [None]:
# Here you will store all the embeddings
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex

document = SimpleDirectoryReader(r'data').load_data()
index = VectorStoreIndex.from_documents(document, show_progress=True)

In [None]:
index.storage_context.persist(persist_dir='storge\embeddings')

# For Reading the index from the folder 

In [None]:
from llama_index.core import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir=r"storage\embeddings")
index = load_index_from_storage(storage_context)

### How to count the tokens when creating and querying llamaIndex

In [25]:
import tiktoken
from llama_index.core import ServiceContext
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler

In [None]:
token_counter = TokenCountingHandler(
    tokenizer= tiktoken.encoding_for_model("text-embedding-ada-002").encode,
     verbose=True
)
callback_manager = CallbackManager([token_counter])
service_context = ServiceContext.from_defaults(callback_manager=callback_manager)

In [None]:
index =  VectorIndexRetriever(document, show_progress=True, service_context=service_context)

In [None]:
query_engine = index.as_query_engine()

In [None]:
query_engine('')

# How to use LLM's with LlamaIndex

In [4]:
from llama_index.llms.openai import OpenAI
llm = OpenAI(temperature=0, model='gpt-3.5-turbo', max_tokens=250)

In [None]:
response = llm.complete('What is API?')

In [None]:
response.text 

In [None]:
# shows metadata
response.raw

# Chat Models

In [None]:
#This will act like a actor for which the role is assigned as

from llama_index.core.llms import ChatMessage

message = [
    ChatMessage(role='system', content='Talk like a 5 year old funny and cute girl who always answer in joke.'),
    ChatMessage(role='system',content="tell me about your math's Teacher")
]

response = llm.chat(messages=message)

In [None]:
print(response)

# Open source llms from the Hugging Face

In [None]:
# This is 10 gb model that will download after running this code. This is the problem for opensource models
from llama_index.llms.huggingface import HuggingFaceLLM

llm = HuggingFaceLLM(
    context_window= 500, 
    max_new_tokens=256,
    generate_kwargs={"temperature":0.7},
    device_map = 'auto',
)

# Prompt

In [None]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
document = SimpleDirectoryReader('data').load_data()
index = VectorStoreIndex(document)

In [None]:
reponse = index.as_query_engine().query('Who have more experience?')
print(response)

In [None]:
# lets create PromtTemplate using LLamaIndex
from llama_index.core.prompts import PromptTemplate
string = (
    "You're a Human Respourse Asssistance of a Company.\n"
    "Your task is to find the field asked by the HR from the given context"
    "{context_str}\n"
    "---------------------"
    "use the context information and answer the below query\n"
    "answer the question : {query_str}\n"
    "if you are not getting the answer from the context just return N/A"
)

text_qa_template = PromptTemplate(string)

In [None]:
print(text_qa_template)

In [None]:
response = index.as_query_engine(text_qa_template=text_qa_template).query('How many students resumes do we have?')

In [None]:
print(response)