In [21]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.core import Settings

In [22]:
# import os

# GOOGLE_API_KEY = "AIzaSyAYypmjJtTx0EHUDCEVGPiq79Szcgm5a7Q"
# os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [23]:
# Setting global parameters
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm = Gemini(model_name="models/gemini-pro")

In [24]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=["llama2.pdf"]).load_data()

In [25]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

# global
from llama_index.core import Settings

Settings.text_splitter = text_splitter

In [26]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(documents, transformations=[text_splitter])

In [27]:
index.storage_context.persist(persist_dir="/cherry/projects/rag_llama_index/research/index_blogs")

In [28]:
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="/cherry/projects/rag_llama_index/research/index_blogs")

# load index
index = load_index_from_storage(storage_context)

In [29]:
template = """
You are a knowledgeable and precise assistant specialized in question-answering tasks, 
particularly from academic and research-based sources. 
Your goal is to provide accurate, concise, and contextually relevant answers based on the given information.

Instructions:

Comprehension and Accuracy: Carefully read and comprehend the provided context from the research paper to ensure accuracy in your response.
Conciseness: Deliver the answer in no more than three sentences, ensuring it is concise and directly addresses the question.
Truthfulness: If the context does not provide enough information to answer the question, clearly state, "I don't know."
Contextual Relevance: Ensure your answer is well-supported by the retrieved context and does not include any information beyond what is provided.

Remember if no context is provided please say you don't know the answer
Here is the question and context for you to work with:

\nQuestion: {question} \nContext: {context} \nAnswer:"""

In [30]:
from llama_index.core.prompts import PromptTemplate

prompt_tmplt = PromptTemplate(template=template,
                              template_var_mappings={"query_str": "question", "context_str": "context"})

In [31]:
from llama_index.core import get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine


# Configure retriever
retriever = VectorIndexRetriever(index=index,
                                 similarity_top_k=10)

# Configure response synthesizer
response_synthesizer = get_response_synthesizer()

# Assemble query engine
query_engine = RetrieverQueryEngine(retriever=retriever,
                                    response_synthesizer=response_synthesizer)

query_engine.update_prompts({"response_synthesizer:text_qa_template": prompt_tmplt})

In [32]:
## Input
response = query_engine.query("What are differet variants of LLama?")
print(response)

Llama 2 comes in a range of parameter sizes—7B, 13B, and 70B—as well as pretrained and fine-tuned variations.


In [33]:
## Input
response = query_engine.query("What are the hyperparamters used for training the model?")
print(response)

The hyperparameters used for training the model are: AdamW optimizer with β1 = 0.9, β2 = 0.95, eps = 10−5, a cosine learning rate schedule with warmup of 2000 steps, and decay final learning rate down to 10% of the peak learning rate, a weight decay of 0.1, and gradient clipping of 1.0.


In [34]:
## Input
response = query_engine.query("Can you please comment on the Carbon Footprint of Pretraining. ")
print(response)

The total carbon emissions for training the Llama 2 family of models were estimated to be 539 tCO2eq. 100% of these emissions were offset by Meta's sustainability program.
