In [160]:
from langchain_ollama import OllamaLLM
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from bs4 import BeautifulSoup
import requests
import re
from langchain_core.runnables import RunnablePassthrough

In [161]:
import requests
from bs4 import BeautifulSoup

class Website:
    def __init__(self, url, *, extend_search=False):
        self.url = url
        self.text = ""
        text, links = self._get_contents(url)
        self.text = text
        if links:
            self.links = self._parse_links(links)

        if extend_search:
            for link in self.links:
                text, _ = self._get_contents(link)
                self.text += "".join(text)
                print("Text from a link was added")

    @staticmethod
    def _get_contents(link):
        response = requests.get(link) # get response
        body = response.content # get the content of the reponse
        soup = BeautifulSoup(body, 'html.parser') # parse the content with html parser
        
        if soup.body:
            for irr in soup.body(['script', 'style', 'img', 'input', 'iframe', 'button', 'nav', 'footer']):
                irr.decompose()
            text = "\n".join(
                [el.get_text(strip=True) for el in soup.find_all(["p", "h1", "h2", "h3", "h4", "h5", "h6"])
            ])
        else:
            text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        
        return text, links

    def _parse_links(self, links):
        parsed_links = []
        for link in links:
            if link:
                if link.startswith("https://") or link.startswith("www"):
                    parsed_links.append(link)
                elif not link.startswith("/"):
                    parsed_links.append(self.url+link)
                else:
                    parsed_links.append(self.url.rstrip('/')+link)
        return parsed_links

In [172]:
link_system_prompt = link_system_prompt = """
You are provided with a list of links on a webpage.
You must decide which of these links would be the most relevant to include in a brochure about the company.
Do not include Terms of Service, privacy, or email links.
Respond in valid JSON format exactly as shown in the example below.
Make sure that all keys and string values (including URLs) are enclosed in double quotes.

Example:
{{
    "links": [
        {{ "type": "about page", "url": "https://full.url/goes/here/about" }},
        {{ "type": "careers page", "url": "https://full.url/goes/here/careers" }}
    ]
}}
"""

link_user_prompt = """
    Here is the list of links on the website:
    {links}.
    
    Please decide which of these are relevant web links for a brochure about the company:
"""

def get_links(url):
    website = Website(url)
    return {"links": "\n".join(website.links)}

llm = OllamaLLM(model="mistral")

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", link_system_prompt),
        ("human", link_user_prompt)
    ]
)

In [173]:
from langchain_core.output_parsers import JsonOutputParser

chain = RunnableLambda(get_links) | prompt | llm | JsonOutputParser()
rel_links = chain.invoke('https://www.langchain.com/')

ResponseError: model 'mistral' not found (status code: 404)

In [165]:
def get_text_for_all_links(relevant_links):
    text = ""
    for link in relevant_links["links"]:
        text += f"Page: {link['type']}"
        text += Website(link['url']).text
    return text
    
text = get_text_for_all_links(rel_links)

In [166]:
make_brochure_system_template = """
You are an expert at analyzing the contents of several pages from a company website.
Using the provided content, create a short, attractive brochure for the company.
Include details that capture the company's essence solely based on the context provided.
Return your response in Markdown format, using appropriate headings, bullet points, and emphasis.\

Context:

{content}
"""

make_brochure_user_template = """
Create a short brochure for {company_name} company:
"""

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", make_brochure_system_template),
        ("human", make_brochure_user_template)
    ]
)

In [167]:
final_chain = final_prompt | llm | StrOutputParser()

In [170]:
brochure = final_chain.invoke({"content":text, "company_name": "LangChain"})

In [171]:
from IPython.display import Markdown, display
display(Markdown(brochure))

Here is a potential brochure design for LangChain:

**[Cover Page]**

[Image of a futuristic AI landscape]

Welcome to LangChain
Accelerating GenAI Development

**[Page 1: Introduction]**

At LangChain, we believe that the future of artificial intelligence is here. Our mission is to empower developers and organizations to build, deploy, and maintain reliable and scalable GenAI applications.

With LangChain's cutting-edge products, you can:

* Boost your development speed by up to 10x
* Improve model accuracy and performance
* Reduce engineering intervention by up to 90%
* Enhance collaboration with our open-source community

**[Page 2: Products]**

Our suite of products includes:

* **LangChain**: Our flagship product, powering the world's top GenAI applications.
* **LangGraph**: A powerful graph-based framework for building scalable AI models.
* **LangSmith**: An expert system that accelerates model training and iteration.
* **LangGrapht**: A flexible graph processing engine for complex data pipelines.

**[Page 3: Customer Success Stories]**

Don't just take our word for it. Our customers have seen significant improvements in development speed, accuracy, and reliability:

* Klarna's AI assistant redefined customer support at scale for 85 million active users
* Rakuten Group builds with LangChain and LangSmith to deliver premium products for its business clients
* Replit redefined their AI agent workflows with LangGraph and LangSmith

**[Page 4: Benefits]**

By using LangChain, you'll enjoy:

* **Faster Development**: Build and deploy GenAI applications up to 10x faster.
* **Improved Accuracy**: Boost model accuracy and performance by up to 50%.
* **Reduced Engineering Intervention**: Automate tedious tasks and focus on high-value work.
* **Enhanced Collaboration**: Join our open-source community and tap into expert knowledge.

**[Back Cover]**

Ready to start shipping reliable GenAI apps faster? Try LangChain today!

[Contact Information: Email, Phone Number, Website]

Let's build the future of AI together.