### python requirements

In [None]:
%pip install langchain-nomic langchain_community tiktoken langchainhub langchain-elasticsearch langchain langgraph tavily-python gpt4all langchain-text-splitters bs4

### environments

In [1]:
# LLM
local_llm = "llama3"

### indexing

In [4]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_elasticsearch import ElasticsearchStore
from langchain_text_splitters import RecursiveCharacterTextSplitter


# To scraping
urls = [
    "https://medium.com/@WLWisdomPro/i-tried-the-lightning-diet-and-lost-40-pounds-in-30-days-here-s-what-happened-8a035beeef7f",
    "https://readmedium.com/en/https://medium.com/write-a-catalyst/i-lost-10-of-my-body-fat-with-5-small-diet-changes-f9aeefe3793d",
    "https://readmedium.com/en/https://medium.com/smart-quitter/change-1-sneaky-diet-mistake-if-you-dont-want-to-be-fat-over-40-d18a5ecb2620",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)
documents=doc_splits

embeddings=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")


# Initialize Elasticsearch store
db = ElasticsearchStore.from_documents(
    documents,
    embeddings,
    es_url="https://0a84a922ebbc48e3b90bb4584a218609.us-central1.gcp.cloud.es.io:443",
    es_user="enterprise_search",
    es_password="enterprise_search123",
    index_name="metrics-endpoint.metadata_current_default",
    )

retriever = db.as_retriever()

### retrieval grader

In [5]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
import json

llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""You are a retrieval grader. Given the question and the document below, evaluate if the document directly answers the question.
    Question: {question}
    Document: {document}
    Respond in JSON format with fields:
    - relevance: a score from 1 (not relevant) to 5 (highly relevant)
    - explanation: a brief explanation for the score.""",
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()

test_question = "What is the best diet to lose calories?"

# Retrieve documents
docs = retriever.invoke(test_question)  

# To check the docs
if docs:
    for i, doc in enumerate(docs):
        doc_txt = docs[1].page_content  

    result = retrieval_grader.invoke({"question": test_question, "document": doc_txt})

    print("\nGrader Result:")
    print(json.dumps(result, indent=2))  
else:
    print("No documents retrieved.")



Grader Result:
{
  "relevance": 2,
  "explanation": "The document discusses the author's personal experience of losing 10% body fat through five dietary changes, but it does not directly answer the question 'What is the best diet to lose calories?' The article provides general tips and advice on how to achieve weight loss, but it does not provide a specific diet plan or recommendation. Therefore, the relevance score is 2 out of 5."
}
