In [6]:
import load_dotenv
import os
load_dotenv.load_dotenv("../../All_LLM_tutorial/.env")

True

In [8]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.core import Settings




# Setting global parameter
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") # set the embedding model
Settings.llm = Gemini(model_name="models/gemini-2.0-flash")

  Settings.llm = Gemini(model_name="models/gemini-2.0-flash")


In [9]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=['./articles/2503.19786v1.pdf']).load_data()

In [11]:
from llama_index.core.node_parser import SentenceSplitter

text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

# global
from llama_index.core import Settings

Settings.text_splitter = text_splitter

In [12]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(documents, transformations=[text_splitter])

In [15]:
index.storage_context.persist(persist_dir="./embeddings")

In [16]:
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./embeddings")

# load index
index = load_index_from_storage(storage_context)

Loading llama_index.core.storage.kvstore.simple_kvstore from ./embeddings/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./embeddings/index_store.json.


In [17]:
template = """
You are a knowledgeable and precise assistant specialized in question-answering tasks, 
particularly from academic and research-based sources. 
Your goal is to provide accurate, concise, and contextually relevant answers based on the given information.

Instructions:

Comprehension and Accuracy: Carefully read and comprehend the provided context from the research paper to ensure accuracy in your response.
Conciseness: Deliver the answer in no more than three sentences, ensuring it is concise and directly addresses the question.
Truthfulness: If the context does not provide enough information to answer the question, clearly state, "I don't know."
Contextual Relevance: Ensure your answer is well-supported by the retrieved context and does not include any information beyond what is provided.

Remember if no context is provided please say you don't know the answer
Here is the question and context for you to work with:

\nQuestion: {question} \nContext: {context} \nAnswer:"""


from llama_index.core.prompts import PromptTemplate

prompt_tmpl = PromptTemplate(
    template=template,
    template_var_mappings={"query_str": "question", "context_str": "context"},
)


In [19]:
from llama_index.core import get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine



# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template":prompt_tmpl}
)

In [21]:
## Input
response = query_engine.query("GEMM3에 대한 간단한 설명")
print(response)

Based on the provided text, Gemma 3 is the latest addition to the Gemma family of open language models, designed for text, image, and code. It focuses on adding image understanding and long context while improving multilinguality and STEM-related abilities. The model sizes and architectures are designed to be compatible with standard hardware.


In [23]:
response = query_engine.query("GEMMA3을 학습시키는데 param 수")
print(response)

Gemma 3은 10억에서 270억 개의 파라미터로 구성되어 있습니다. Gemma 3 4B 모델은 417M의 임베딩 파라미터와 675M의 비임베딩 파라미터를 가지고 있습니다. Gemma 3 27B 모델은 417M의 임베딩 파라미터와 25,600M의 비임베딩 파라미터를 가지고 있습니다.

