In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import re
import os
import json
import base64
import asyncio
import platform
import requests
import playwright
import numpy as np
import pandas as pd
import datetime as dt

from enum import Enum
from typing import List
from typing import Dict
from typing import Union
from typing import Optional
from typing import TypedDict
from operator import itemgetter
from playwright.async_api import Page
from playwright.async_api import async_playwright

from IPython import display
from IPython.display import HTML
from IPython.display import Image

from langchain import hub
from langgraph.graph import END
from langgraph.graph import StateGraph
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import MessagesPlaceholder
from langchain.prompts import HumanMessagePromptTemplate
from langchain.prompts import SystemMessagePromptTemplate

from langchain_core.messages import BaseMessage
from langchain_core.messages import SystemMessage

from langchain_core.pydantic_v1 import Field
from langchain_core.pydantic_v1 import BaseModel

from langchain_core.messages.ai import AIMessage
from langchain_core.messages.chat import ChatMessage
from langchain_core.messages.tool import ToolMessage
from langchain_core.messages.human import HumanMessage
from langchain_core.messages.system import SystemMessage
from langchain_core.messages.function import FunctionMessage
from langchain_core.prompts.image import ImagePromptTemplate

from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader

from langchain_core.runnables import RunnableLambda
from langchain_core.runnables import RunnableParallel
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables.graph import CurveStyle
from langchain_core.runnables.graph import NodeColors
from langchain_core.runnables.graph import MermaidDrawMethod
from langchain_core.runnables import chain as chain_decorator

from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser

from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'

In [4]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]


In [5]:
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=250, chunk_overlap=0)
doc_splits = text_splitter.split_documents(docs_list)

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name='rag-chroma',
    embedding=OpenAIEmbeddings()
)

retriever = vectorstore.as_retriever()

In [7]:
# data model
class GradeDocuments(BaseModel):
    '''Binary score for relevance check on retrieved documents'''
    binary_score: str = Field(description='Documents are relevant to the question, "yes" or "no"')

# LLM with function call
llm = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

system = '''You are a grader assessing relevance of a retrieved document to a user question. 
If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. 
Give a binary score "yes" or "no" score to indicate whether the document is relevant to the question.'''
grade_prompt = ChatPromptTemplate.from_messages([
    ('system', system),
    ('human', 'Retrieved document: \n\n {document} \n\n User question: {question}')
])

retrieval_grader = grade_prompt | structured_llm_grader
question = 'agent memory'
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({'question': question, 'document': doc_txt}))

binary_score='yes'
