In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_google_genai import GoogleGenerativeAI, ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
from dotenv import load_dotenv
import google.generativeai as genai
import os

load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

In [2]:
splitter = CharacterTextSplitter(separator = '\n', chunk_size = 250, chunk_overlap = 25)

fakta_loader = TextLoader("./data/fakta.txt")
mitos_loader = TextLoader("./data/mitos.txt")

fakta_docs = fakta_loader.load()
fakta_docs[0].page_content = fakta_docs[0].page_content.lower()

mitos_docs = mitos_loader.load()
mitos_docs[0].page_content = mitos_docs[0].page_content.lower()

fakta = splitter.split_documents(fakta_docs)
mitos = splitter.split_documents(mitos_docs)

In [3]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db_fakta = FAISS.from_documents(fakta, embeddings)
db_mitos = FAISS.from_documents(mitos, embeddings)

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
qa_fakta = RetrievalQA.from_chain_type(llm=llm, retriever=db_fakta.as_retriever())
qa_mitos = RetrievalQA.from_chain_type(llm=llm, retriever=db_mitos.as_retriever())


In [8]:
def classify_input_with_similarity(input_text, fact_store, myth_store, qa_fakta, qa_mitos, threshold=0.6):
    input_text = input_text.lower()
    
    fakta_score = fact_store.similarity_search_with_relevance_scores(input_text, k=1)[0][1]
    mitos_score = myth_store.similarity_search_with_relevance_scores(input_text, k=1)[0][1]


    if max(fakta_score, mitos_score) < threshold:
        return "Out of Topic"

    if fakta_score > mitos_score:
        ans_fakta = qa_fakta.invoke(input_text)
        return f"Fakta\n{ans_fakta['result']}"
    else:
        ans_mitos = qa_mitos.invoke(input_text)
        return f"Mitos\n{ans_mitos['result']}"

In [9]:
input_text = "Menggunakan kacamata minus bisa menyebabkan mata lebih lelah"
classification = classify_input_with_similarity(input_text, db_fakta, db_mitos, qa_fakta, qa_mitos,)
print(f"Klasifikasi: {classification}")

Klasifikasi: Mitos
Benar, menggunakan kacamata minus bisa menyebabkan mata lebih lelah. 



In [2]:
len(mitos), len(fakta)

(60, 64)