# 인공지능 모델 로드

In [1]:
from langchain.llms import LlamaCpp

model = LlamaCpp(
  model_path="../llama-2-7b-chat.Q5_K_M.gguf",
  temperature=0.0,
  top_p=1,
  max_tokens=8192,
  verbose=True,
  # 모델이 한 번에 처리할 수 있는 최대 컨텍스트 길이
  n_ctx=4096,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from ../llama-2-7b-chat.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32             

# vectorDB 데이터 로드

In [42]:
import chromadb
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
  model_name="BAAI/bge-small-en",
  model_kwargs={'device': 'cpu'},
  encode_kwargs={'normalize_embeddings': True}
)

client_settings = chromadb.config.Settings(
  persist_directory="./vectorstore/example-embedding/",
  chroma_db_impl="parquet"
)
vector_db = Chroma(
  client_settings=client_settings,
  embedding_function=embeddings
)

In [45]:
query = "The Customer’s IT organization"
docs = vector_db.similarity_search_with_score(query)

docs

[(Document(page_content='Page 7 of 9 \n SAMPLE STATEMENT OF WORK  \nTechnology Assumptions  \n1. The Customer’s IT organization is responsible for workstation compliance to Globex  \nminimum requirements and any  pre-installation activities (if applicable).  \n2. Customer is responsible for the purchase and installation of any third -party', metadata={'source': './PDFS/sample-statement-of-work.pdf', 'page': 6}),
  0.29618027806282043),
 (Document(page_content='The Customer will designate one person to serve  as Project Manager .  The Customer’s \nproject manager  will have authority to approve project -related services and may designate \nother individuals to act as project manager s, subject -matter experts, and /or advisors  during \nthe engagement .  The Customer  will be responsible for the quality and timeliness of work', metadata={'source': './PDFS/sample-statement-of-work.pdf', 'page': 3}),
  0.3413243889808655),
 (Document(page_content='• Network Appliance setup/troubleshooting

# 프롬프트 생성

In [47]:
# from langchain import hub

# prompt = hub.pull("rlm/rag-prompt")
# print(prompt)


from langchain.prompts import PromptTemplate

prompt_template = """
Give an answer by referring to the context, and include the address within the context in the answer, and clearly number the answer.

{context}

Question: {question}
"""

# Retrieval 생성

In [46]:
from langchain.chains import RetrievalQA

chain_type_kwargs = {
  "prompt": PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"],
  )
}

qa_chain = RetrievalQA.from_chain_type(
  llm=model,
  chain_type="stuff",
  retriever=vector_db.as_retriever(),
  return_source_documents=True,
  # verbose=True,
)

# 인공지능 모델에게 질문

In [38]:
# question  ="What is a LangChain?"
# result = qa_chain({"query": question})

KeyboardInterrupt: 

In [None]:
result["result"]