# RAG langchain

In [1]:
from typing import List
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.retrievers import BaseRetriever

from FlagEmbedding import BGEM3FlagModel
from pinecone import Pinecone, Index

from langchain_core.prompts import PromptTemplate

import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextStreamer
from langchain_huggingface import HuggingFacePipeline

from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

2024-09-15 11:43:52.934659: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-15 11:43:52.934712: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-15 11:43:52.934755: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-15 11:43:52.943191: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
from dotenv import load_dotenv
import os

load_dotenv("../.env")

True

In [7]:
class HybridSearchRetriever(BaseRetriever):

    pinecone_index:Index
    embedding_model:BGEM3FlagModel
    alpha:float
    top_k: int
    min_score:float

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[str]:
        """Sync implementations for retriever."""
        user_query_emb = self.embedding_model.encode(query, return_dense=True, return_sparse=True, return_colbert_vecs=False) #dense, sparse 둘 다 반환함
        
        query_dense_vector = user_query_emb['dense_vecs'].tolist()
        user_query_sparse = user_query_emb['lexical_weights']
        query_sparse_vector = {
            'indices': [int(k) for k in user_query_sparse.keys() if k.isdigit()], #isdigit() 안하면 에러뜨더라
            'values': [float(v) for k, v in user_query_sparse.items() if k.isdigit()]
        }

        hdense, hsparse = self._hybrid_score_norm(query_dense_vector, query_sparse_vector, alpha=self.alpha)

        hybrid_query_response = self.pinecone_index.query(
            top_k=self.top_k,
            vector=hdense,
            sparse_vector=hsparse,
            include_metadata=True,
        )
        
        documents = [
            f"{match['metadata']['answer_intro']}\n"
            f"{match['metadata']['answer_body']}\n"
            f"{match['metadata']['answer_conclusion']}"
            for match in hybrid_query_response['matches']
            if match['score'] >= self.min_score
        ]
        return documents
    
    def _hybrid_score_norm(self, dense, sparse, alpha: float):
        """Hybrid score using a convex combination

        alpha * dense + (1 - alpha) * sparse

        Args:
            dense: Array of floats representing
            sparse: a dict of `indices` and `values`
            alpha: scale between 0 and 1
        """
        if alpha < 0 or alpha > 1:
            raise ValueError("Alpha must be between 0 and 1")
        hs = {
            'indices': sparse['indices'],
            'values':  [v * (1 - alpha) for v in sparse['values']]
        }
        return [v * alpha for v in dense], hs

In [8]:
# Connect to the existing Pinecone index
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pinecone_index = pc.Index("health-care")

embedding_model = BGEM3FlagModel('BAAI/bge-m3',use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

  colbert_state_dict = torch.load(os.path.join(model_dir, 'colbert_linear.pt'), map_location='cpu')
  sparse_state_dict = torch.load(os.path.join(model_dir, 'sparse_linear.pt'), map_location='cpu')


In [9]:
retriever = HybridSearchRetriever(
        pinecone_index=pinecone_index,
        embedding_model=embedding_model,
        alpha=0.95,
        top_k=3,
        min_score=0.55,
    )

In [16]:
def hybrid_score_norm(dense, sparse, alpha: float):
    """Hybrid score using a convex combination

    alpha * dense + (1 - alpha) * sparse

    Args:
        dense: Array of floats representing
        sparse: a dict of `indices` and `values`
        alpha: scale between 0 and 1
    """
    if alpha < 0 or alpha > 1:
        raise ValueError("Alpha must be between 0 and 1")
    hs = {
        'indices': sparse['indices'],
        'values':  [v * (1 - alpha) for v in sparse['values']]
    }
    return [v * alpha for v in dense], hs

def test(query):
    user_query_emb = embedding_model.encode(query, return_dense=True, return_sparse=True, return_colbert_vecs=False) #dense, sparse 둘 다 반환함
            
    query_dense_vector = user_query_emb['dense_vecs'].tolist()
    user_query_sparse = user_query_emb['lexical_weights']
    query_sparse_vector = {
        'indices': [int(k) for k in user_query_sparse.keys() if k.isdigit()], #isdigit() 안하면 에러뜨더라
        'values': [float(v) for k, v in user_query_sparse.items() if k.isdigit()]
    }

    hdense, hsparse = hybrid_score_norm(query_dense_vector, query_sparse_vector, alpha=0.95)

    hybrid_query_response = pinecone_index.query(
        top_k=3,
        vector=hdense,
        sparse_vector=hsparse,
        include_metadata=True,
    )
    print(hybrid_query_response)

In [21]:
test("아데노")

{'matches': [{'id': 'HC-A-02104685',
              'metadata': {'answer_body': '이 질환의 초기 증상은 주로 추위나 스트레스 상황에서 손가락이나 '
                                          '발가락이 창백해지고 감각이 둔화되는 것입니다. 이러한 증상은 '
                                          '일시적으로 발생하며, 전신성 경화증이 동반된 경우 더욱 '
                                          '심해집니다. 레이노현상은 따뜻한 환경에서 더욱 심해질 수 '
                                          '있으므로 환우회나 의사를 방문하여 검사를 받는 것이 좋습니다.',
                           'answer_conclusion': '손발이 차고 수족냉증이 있는 경우 레이노증후군 '
                                                '가능성이 있으므로 의심 증상이 있다면 반드시 검사를 '
                                                '받아 적절한 치료를 받아야 합니다.',
                           'answer_intro': '레이노증후군은 혈관 반응 장애로 인해 손가락이나 발가락에서 '
                                           '창백하거나 비정상적인 혈관의 수축 증상이 나타나는 질환입니다.',
                           'department': '내과',
                           'disease_category': '뇌신경정신질환',
                           'disease_name_kor': '레이노병',
                      

In [10]:
model_id="google/gemma-2-2b-it"

gemma_2_model = AutoModelForCausalLM.from_pretrained(model_id)
gemma_2_tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
streamer = TextStreamer(gemma_2_tokenizer, skip_prompt=True, skip_special_tokens=True)

gen = pipeline(
    task='text-generation',
    model=gemma_2_model,
    tokenizer=gemma_2_tokenizer,
    max_new_tokens=1024,
    streamer=streamer,
    device=0 if torch.cuda.is_available() else -1,
    temperature=.5,
    top_p=0.7,
    repetition_penalty=1.1,
    do_sample=True,
    )

llm = HuggingFacePipeline(pipeline=gen)

In [12]:
template = """
Instructions:
- If the question involves a health-related issue, suggest possible causes and basic steps the user can take for relief, if applicable.
- You should explain in as much detail as possible what you know from the bottom of your heart to the user's questions.
- You can refer to the contents of the documents to create a response.
- Only use information that is directly related to the question.
- If no information is found in the documents, provide an answer based on general knowledge without fabricating details.
- You MUST answer in Korean.


Documents: {documents}

Question: {question}
"""

prev_chat = []

chat = [
    *prev_chat,
     { "role": "user", "content": template}
]

prompt_template = gemma_2_tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

prompt = PromptTemplate(input_variables=["question", "documents"], template=prompt_template)
prompt

PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template="<bos><start_of_turn>user\nInstructions:\n- If the question involves a health-related issue, suggest possible causes and basic steps the user can take for relief, if applicable.\n- You should explain in as much detail as possible what you know from the bottom of your heart to the user's questions.\n- You can refer to the contents of the documents to create a response.\n- Only use information that is directly related to the question.\n- If no information is found in the documents, provide an answer based on general knowledge without fabricating details.\n- You MUST answer in Korean.\n\n\nDocuments: {documents}\n\nQuestion: {question}<end_of_turn>\n<start_of_turn>model\n")

In [13]:
rag_chain = (
    {"documents": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [15]:
answer = rag_chain.invoke("유행성 각결막염(EKC)의 주요 병원체와 임상적인 증상에 대해 설명해주세요.")

유행성 각막염(EKC)은 바이러스에 의해 발생하는 급성 유행성 전염병으로, **파라믹소 바이러스** 과에 속하는 RNA 바이러스입니다.  

**임상적 증상**:

* **발열:** 초기에는 발열이 나타날 수 있습니다.
* **두통:** 두통이 나타날 수 있습니다.
* **근육통:** 근육통이 나타날 수 있습니다.
* **식욕 부진:** 식욕 부진이 나타날 수 있습니다.
* **이하선 비대:** 이하선(귀 밑 샘)이 부어오르는 증상이 나타납니다.
* **통증:** 이하선 부어오름과 함께 통증이 나타납니다.
* **귀 밑 림프절 부종:** 귀 밑의 림프절이 부풀어 오르는 증상이 나타납니다.
* **비충혈, 재채기, 기침, 발열, 부종 등:** 일부 환자에게 나타날 수 있습니다. 



참고: 위 내용은 제공된 문서에서 얻은 정보입니다. 



In [101]:
def stream_response(response, return_output=False):
    answer = ""
    for token in response:
        answer += token
        print(token, end="", flush=True)
    if return_output:
        return answer

END.