In [None]:
""" Building a Prototype RAG.
    1. Data Load
    2. Check Query Engine
    3. Connect Retriever and Generator
"""

In [None]:
"""   1. Data Load """
city_name_path = r'city_short.txt' #change this path

city_names = []

with open(city_name_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()
    for line in lines:
        city = line.split(':')[0][:-1]
        city_names.append(city)

print(city_names) #100 different cities

if city_related_to_question not in city_names:
    city_names.append(city_related_to_question)

reader = WikipediaReader()
documents = reader.load_data(city_names, auto_suggest=False)

index = VectorStoreIndex.from_documents(documents)

# 아래처럼 chunk size와 overlap에 variation을 줄 수 있음. 
text_splitter_short = SentenceSplitter(chunk_size=200, chunk_overlap=50)
index_short = VectorStoreIndex.from_documents(documents=documents, transformations=[text_splitter_short])

text_splitter_long = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
index_long = VectorStoreIndex.from_documents(documents=documents, transformations=[text_splitter_long])

In [None]:
""" 2. Check Query Engine """
query_engine = index.as_query_engine()
response = query_engine.query("What's the arts and culture scene in Berlin?")

print(textwrap.fill(str(response), 100))

In [None]:
""" 3. Connect Retriever and Generator """

from openai import OpenAI

oai_client = OpenAI()

class RAG_from_scratch:
    def retrieve(self, query: str) -> list:
        # RAG에서 "검색(Retrieval)" 단계에 해당
        results = query_engine.query(query)
        return results

    def generate_response(self, query: str, context_str: list) -> str:
        # RAG에서 "생성(Generation)" 단계에 해당
        completion = oai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            temperature=0,
            messages=
            [
                {"role": "user",
                "content":
                f"We have provided context information below. \n"
                f"---------------------\n"
                f"{context_str}"
                f"\n---------------------\n"
                f"Given this information, please answer the question: {query}"
                }
            ]
        ).choices[0].message.content 
            # API 호출 결과에서 첫 번째 응답(choices[0])을 가져온다.
            # message.content는 모델이 생성한 실제 텍스트 답변        
        return completion

    def query(self, query: str) -> str: # 전체 파이프라인을 실행하는 메서드.
        context_str = self.retrieve(query) # retrieve를 호출해 관련 문맥을 가져오고
        completion = self.generate_response(query, context_str) # 최종 답변을 생성시킴
        return completion

rag = RAG_from_scratch()

city_question = 'Which Korean city has a relationship with Dresden?'

answer = rag.query(city_question)

In [None]:
retriever = index.as_retriever()
synthesizer = get_response_synthesizer(response_mode="compact")

class Refine_RAG:
    def retrieve(self, query: str) -> list:
        ret = retriever.retrieve(query)
        results = query_engine.query(query)
        return ret, results

    def generate_response(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        messages = [
            {
                "role": "system",
                "content": f"You are a helpful assistant. Answer as concisely as possible.",
            },
            {
                "role": "user",
                "content":
                    f"""
                        .... 
                    """
                # f에도 """ 이게 먹히는듯?
            }
        ]

        response = oai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            temperature=0,
            messages=messages,
        )

        return response.choices[0].message.content

    def query(self, query: str) -> str:
        ret, context_str = self.retrieve(query)
        # 결국 ret 자체는 여기서 쓰이진 않음.
        completion = self.generate_response(query, context_str)
        return completion

refine_rag = Refine_RAG()

sample_question = "City council of Suwon addressed illegal dumping of household waste in what way?"

answer = refine_rag.query(sample_question)
