## Builds a brochure for any given website with important highlights of the company


In [151]:
import requests
from bs4 import BeautifulSoup
import ollama
import json
from IPython.display import Markdown

MODEL = 'llama3.2'

In [2]:
class Website:
    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(['script', 'style', 'img', 'input']):
                irrelevant.decompose()
            self.text = soup.body.getText(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Website Title: {self.title}\nWebpage Contents:\n{self.text}\n\n"


In [3]:
link_system_prompt = "You are provided with a list of links found on a webpage." \
"You are able to decide which of the links will be most relevant to include in a briefer about the company," \
"such as links to an About page, or a Company page, or Careers/jobs pages.\nYou should respond in JSON as in this example:"

link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://company-url/about"},
        {"type": "careers page", "url": "https://company-full-url/jobs"},
    ]
}
"""


In [23]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website {website.url} - please decide which of these are relevant web links for a company briefer, strictly respond only in JSON format with full https URLs.\
    Do not include Terms or privacy, service or email links."

    user_prompt += f"\nLinks (some might be relevant links): \n{website.links}"
    return user_prompt

In [24]:
print(get_links_user_prompt(Website("https://huggingface.co")))

Here is the list of links on the website https://huggingface.co - please decide which of these are relevant web links for a company briefer, strictly respond only in JSON format with full https URLs.    Do not include Terms or privacy, service or email links.
Links (some might be relevant links): 
['/', '/models', '/datasets', '/spaces', '/docs', '/enterprise', '/pricing', '/login', '/join', '/spaces', '/models', '/tencent/SRPO', '/openbmb/VoxCPM-0.5B', '/Alibaba-NLP/Tongyi-DeepResearch-30B-A3B', '/google/vaultgemma-1b', '/Qwen/Qwen3-Next-80B-A3B-Instruct', '/models', '/spaces/enzostvs/deepsite', '/spaces/zerogpu-aoti/wan2-2-fp8da-aoti-faster', '/spaces/IndexTeam/IndexTTS-2-Demo', '/spaces/multimodalart/wan-2-2-first-last-frame', '/spaces/abdul9999/NoWatermark', '/spaces', '/datasets/HuggingFaceFW/finepdfs', '/datasets/LucasFang/FLUX-Reason-6M', '/datasets/fka/awesome-chatgpt-prompts', '/datasets/InternRobotics/OmniWorld', '/datasets/HuggingFaceM4/FineVision', '/datasets', '/join', '/p

In [90]:
def get_relevant_links (url):
    website = Website(url)
    response = ollama.chat(MODEL, messages=[
        {'role': 'system', 'content': link_system_prompt},
        {'role': 'user', 'content': get_links_user_prompt(website)}
    ])
    return response.message['content']

In [98]:
response = get_relevant_links("https://www.langchain.com")

In [99]:
print(response)

{
    "links": [
        {"type": "about page", "url": "https://www.langchain.com/about"},
        {"type": "careers page", "url": "https://www.langchain.com/careers"},
        {"type": "pricing page", "url": "https://www.langchain.com/pricing"}
    ]
}


In [130]:
# Now make another API call
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_relevant_links(url)
    links_json = json.loads(links)
    # print(links)
    for link in links_json['links']:
        result += f"\n\n{link["type"]}\n"
        result += Website(link["url"]).get_contents()
    return result

In [None]:
link_details = get_all_details("https://www.langchain.com")

In [135]:
system_prompt_brochure = "You are an assistant that analyses the contents of several different pages from a company website. \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown format.\
Include details of company culture, customers and careers/jobs if you have the information"

In [136]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}"
    user_prompt += f"Here are the contents of the landing page and other relevant pages; use this information to build a short brochure of the company in markdown format.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5000]
    return user_prompt

In [139]:
get_brochure_user_prompt("LangChain", "https://www.langchain.com")

"You are looking at a company called: LangChainHere are the contents of the landing page and other relevant pages; use this information to build a short brochure of the company in markdown format.\nLanding page:\nWebsite Title: LangChain\nWebpage Contents:\nProducts\nFrameworks\nLangGraph\nLangChain\nPlatforms\nLangSmith\nLangGraph Platform\nResources\nGuides\nBlog\nCustomer Stories\nLangChain Academy\nCommunity\nEvents\nChangelog\nDocs\nPython\nLangGraph\nLangSmith\nLangChain\nJavaScript\nLangGraph\nLangSmith\nLangChain\nCompany\nAbout\nCareers\nPricing\nGet a demo\nSign up\nProducts\nFrameworks\nLangGraph\nLangChain\nPlatforms\nLangSmith\nLangGraph Platform\nResources\nGuides\nBlog\nCustomer Stories\nLangChain Academy\nCommunity\nEvents\nChangelog\nDocs\nPython\nLangGraph\nLangSmith\nLangChain\nJavaScript\nLangGraph\nLangSmith\nLangChain\nCompany\nAbout\nCareers\nPricing\nGet a demo\nSign up\nThe platform for\nreliable agents.\nTools for every step of the agent development lifecycle 

In [153]:
def create_brochure(company_name, url):
    response = ollama.chat(MODEL,messages=[
        {'role': 'system', 'content': system_prompt_brochure},
        {'role': 'user', 'content': get_brochure_user_prompt(company_name, url)}
    ])
    result = response.message['content']
    display(Markdown(result))

In [154]:
brochure = create_brochure("LangChain", "https://www.langchain.com")

**LangChain Brochure**
=====================

**Unlock the Power of Reliable Agents with LangChain**

At LangChain, we're revolutionizing the way applications are built and deployed. Our suite of products is designed to help you unlock powerful AI in production, accelerate agent development, and build faster with templates and a visual agent IDE.

**What is LangChain?**
------------------------

LangChain is a comprehensive platform for building reliable agents that can handle sophisticated tasks with control. Our platform consists of:

* **LangGraph**: A controllable agent orchestration framework with built-in persistence to handle conversational history, memory, and agent-to-agent collaboration.
* **LangSmith**: An evaluation and observability tool that helps you debug poor-performing LLM app runs and evaluate agent performance at scale.
* **LangGraph Platform**: A deployment and management platform for enterprise-grade agents with long-running workflows.

**Our Products**
-----------------

### LangChain Academy

* Learn alongside the 1M+ practitioners in our developer community
* Stay up-to-date with the latest industry trends and best practices

### Copilots

* Unlock new end-user experiences for domain-specific tasks with native co-pilots
* Improve the speed and efficiency of support teams that handle customer requests

### Enterprise GPT

* Give all employees access to information and tools in a compliant manner
* Enable employees to perform their best with LangChain's AI-powered solutions

**Customer Stories**
-------------------

* **Klarna**: Reduced average customer query resolution time by 80% using LangSmith and LangGraph
* **Global Logistics Provider**: Saved 600 hours a day using an automated order system built on LangGraph and LangSmith
* **Trellix**: Cut log parsing from days to minutes using LangGraph and LangSmith

**Join the LangChain Community**
------------------------------

Discover how our products are driving operational efficiency, increasing discovery & personalization, and delivering premium products that generate revenue.

**Get Started with LangChain**
---------------------------

* Request a demo to see our products in action
* Sign up for our newsletter to stay up-to-date with the latest news and updates

**Stay Ahead of the Competition**
---------------------------------

Don't miss out on the opportunity to unlock powerful AI in production with LangChain. Join our community today and start building faster, getting to production quicker, and growing visibility – all with less set up and friction.

Contact Us
----------

Email: [info@langchain.com](mailto:info@langchain.com)
Phone: +1 555 123 4567

Website: langchain.com