## 해결하려는 문제점
**작년에 BO3 팀 주제 같은 주제로 연구한 팀 없어?**
- 두 번 검색을 해야 해결 할 수 있는 문제
- Multi-Hop Retriever을 활용

### vectorstore 및 LLM

In [5]:
import streamlit as st
from langchain_openai import ChatOpenAI

openai_api_key = st.secrets['OPENAI_API_KEY']
llm = ChatOpenAI(model_name="gpt-4o", temperature=0, openai_api_key=openai_api_key)

In [6]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings

In [7]:
vectorstore = Chroma(
    persist_directory="db/chroma_2024_pdfs",
    embedding_function=OpenAIEmbeddings(openai_api_key=openai_api_key)
)

In [8]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
metadata_field_info = [
    AttributeInfo(
        name="Team code",
        description="Unique code that the team has. alphabetical uppercase + double digit combination.",
        type="string",
    ),
    AttributeInfo(
        name="Title",
        description="the topic that the team studied/made",
        type="string",
    ),
    AttributeInfo(
        name="Teammate #1 name",
        description="A team member's name. name is two or three letters of Hangul.",
        type="string"
    ),
    AttributeInfo(
        name="Teammate #1 number",
        description="A team member's student number. The student number is four digits.",
        type="string"
    ),
    AttributeInfo(
        name="Teammate #2 name",
        description="A team member's name. name is two or three letters of Hangul.",
        type="string"
    ),
    AttributeInfo(
        name="Teammate #2 number",
        description="A team member's student number. The student number is four digits.",
        type="string"
    ),
    AttributeInfo(
        name="Teammate #3 name",
        description="A team member's name. name is two or three letters of Hangul.",
        type="string"
    ),
    AttributeInfo(
        name="Teammate #3 number",
        description="A team member's student number. The student number is four digits.",
        type="string"
    ),
    AttributeInfo(
        name="Physics",
        description="Whether Physics is used. Can be True or False",
        type="boolean"
    ),
    AttributeInfo(
        name="Chemistry",
        description="Whether Chemistry is used. Can be True or False",
        type="boolean"
    ),
    AttributeInfo(
        name="Biology",
        description="Whether Biology is used. Can be True or False",
        type="boolean"
    ),
    AttributeInfo(
        name="EarthScience",
        description="Whether Earth Science is used. Can be True or False",
        type="boolean"
    ),
    AttributeInfo(
        name="Youtube link",
        description="A youtube video link from the team. The vido can be played by clicking on the link.",
        type="string"
    )
]

In [9]:
examples = [
    (
        "A23 팀?",
        {
            "query": "작품 설명서",
            "filter": 'eq("Team code", "A23")',
        },
    ),
    (
        "이동윤은 뭐했어?",
        {
            "query": "작품 설명서",
            "filter": 'or(eq("Teammate #1 name", "이동윤"), eq("Teammate #2 name", "이동윤"))',
        },
    ),
    (
        "환경에 관한 주제로 연구한 팀을 알려줄래?",
        {
            "query": "환경에 관한 주제로 연구한 팀을 알려줄래?",
            "filter": "NO_FILTER",
        }   
    ),
    (
        "팀 번호가 B로 시작하는 프로젝트의 주제는 어떤 것이 있어?",
        {
            "query": "팀 번호가 B로 시작하는 프로젝트의 주제는 어떤 것이 있어?",
            "filter": "NO_FILTER",
        }
    ),
    (
        "머신러닝을 사용하지 않은 팀이 있을까?",
        {
            "query": "머신러닝을 사용하지 않은 팀이 있을까?",
            "filter": "NO_FILTER",
        }
    )
]

In [10]:
from langchain.chains.query_constructor.base import (
    StructuredQueryOutputParser,
    get_query_constructor_prompt,
)

prompt = get_query_constructor_prompt(
    'Ocean ICT 대회에 참가한 팀의 작품 설명서.',
    metadata_field_info,
    examples=examples
)

output_parser = StructuredQueryOutputParser.from_components()
new_query_constructor = prompt | llm | output_parser

In [11]:
from langchain.retrievers.self_query.chroma import ChromaTranslator
translator = ChromaTranslator()


### Retriever

In [12]:
self_query_retriever = SelfQueryRetriever(
    query_constructor=new_query_constructor,
    vectorstore=vectorstore,
    structured_query_translator=translator
)

In [13]:
self_query_retriever.invoke('김환 팀?')

: 