In [129]:
import os

In [130]:
os.environ["LANGCHAIN_TRACING_V2"]="true" # enables the tracing
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"]="assignment-6" #project name in the LangSmith platform
os.environ['USER_AGENT'] = 'myagent'

In [131]:
from langchain_openai import ChatOpenAI
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import YoutubeLoader



In [132]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [133]:
# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer()
loader = WebBaseLoader(
    web_paths=("https://lftechnology.atlassian.net/wiki/spaces/LEAP/pages/3971055677/Code+of+Conduct?atlOrigin=eyJpIjoiZDJhZjQwNWFjNGUxNDQyM2I3ODViNjUzNTA5ZDVlOTUiLCJwIjoiYyJ9",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

len(docs[0].page_content)

202

In [134]:
loader = YoutubeLoader.from_youtube_url(
    "https://youtu.be/noU9iUMIbq0?list=PL1VW8Wpejk2y9zhwprLdxVS79n36dWc39",
    add_video_info=True,
    language=["en", "id"],
    translation="en",
)
docs = loader.load()

In [135]:
print(docs[0].page_content)

[Music]  [Music]  hello and welcome back everyone after  that short intermission uh we've got our  first lightning session speaker for  today we've got biplup subedi project  manager here at leapfrog my fellow  colleague so  let me call him on to the speaker's  platform  are you with us  hi kritika hello everyone yeah i'm here  hi biplob  okay so there was a photo of you that uh  in which you were working from a valley  top um  yeah that went viral on routine of nepal  so  what do you think remote working has  brought in terms of not being tied down  to the four walls of an office  yeah like  two things right i feel so much  privileged because i used to be a big  fan of nature and especially the the  natural beauty of nepal and  even  before three four years it was not  possible because  we didn't had good uh electricity  facility the internet internet coverage  was not good right and since last two  three years um thanks to nea  thanks to all the isps we have  we have good electricity

In [136]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

6

In [137]:
len(all_splits[0].page_content)
all_splits[0].metadata

{'source': 'noU9iUMIbq0',
 'title': 'Freedom of Remote Working |  Biplab Subedi | Syntax 2021',
 'description': 'Unknown',
 'view_count': 222,
 'thumbnail_url': 'https://i.ytimg.com/vi/noU9iUMIbq0/hq720.jpg',
 'publish_date': '2021-08-26 00:00:00',
 'length': 332,
 'author': 'Leapfrog Technology',
 'start_index': 0}

In [138]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [139]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [140]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question."
    "You can only answer questions about the context."
    "The answer must be referenced from the context only." 
    "If you don't know the answer or the answer cannot be found based"
    "on the context then, say that you don't know." 
    "Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [141]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream('What is Task Decomposition?'):
    print(chunk, end="", flush=True)

Task Decomposition is crucial when it comes to designing an API as it enhances the user experience by breaking down complex tasks into smaller, more manageable subtasks. It helps in simplifying pagination in hierarchical structures, making it easier for users to navigate through the content efficiently. Task Decomposition is a method that can be applied in various contexts to improve the overall usability and functionality of a system or process.

In [143]:
#built in chain for the implementation of above LCEL
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "how to eat a banana?"})
print(response["answer"])

I don't know.
