# [문제]
- law_2.docx 파일을 읽고, Chroma에 저장
- LLM 질문 -> 답변
- 전세사기피해에 관한 법률 질문만 받기
- 이 외의 질문은 '답변을 할 수 없습니다.'

In [None]:
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders import Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


loader = Docx2txtLoader('law_2.docx')

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
)

document_list = loader.load_and_split(text_splitter=text_splitter)

load_dotenv()

embedding = OpenAIEmbeddings(model='text-embedding-3-large')

database = Chroma.from_documents(
    documents=document_list,
    embedding=embedding,
    persist_directory='./chroma',
    collection_name='chroma-law-2',
)

In [7]:

database = Chroma(
    collection_name='chroma-law-2',
    persist_directory='./chroma',
    embedding_function=embedding,
)

query = '딸기 가격은?'

docs_and_scores = database.similarity_search_with_score(query=query, k=1)
print("distance =", docs_and_scores[0][1])

THRESHOLD = 1.5

if not docs_and_scores or docs_and_scores[0][1] > THRESHOLD:
    print("답변을 할 수 없습니다")
else:
    retrieved_docs = [docs_and_scores[0][0]]

    prompt = '''
    [identity]
    - 당신은 전세사기피해 법률 전문가입니다.
    - [context]를 참고하여 사용자의 질문에 답변해주세요.

    [context]
    {retrieved_docs}

    Question: {query}
    '''

    formatted_prompt = prompt.format(retrieved_docs=retrieved_docs, query=query)

    llm = ChatOpenAI()

    ai_message = llm.invoke(formatted_prompt)

    print(ai_message.content)

distance = 1.711654782295227
답변을 할 수 없습니다
