In [79]:
import os
from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from openai import OpenAI
import json
from IPython.display import display, Markdown, update_display

In [80]:
load_dotenv()

True

In [81]:
# Parse webpages which is designed using JavaScript heavily
# download the chorme driver from here as per your version of chrome - https://developer.chrome.com/docs/chromedriver/downloads
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options


def get_website_content_using_selenium(url):
    PATH_TO_CHROME_DRIVER = 'chromedriver-win64/chromedriver.exe'

    options = Options()

    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    service = Service(PATH_TO_CHROME_DRIVER)
    driver = webdriver.Chrome(service=service, options=options)
    driver.get(url)

    page_source = driver.page_source
    driver.quit()
    return page_source

In [82]:
from sympy import content


class Website():
    # Some websites need you to use proper headers when fetching them:
    
    
    def __init__(self, url):
        self.url = url
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
        }
    
    def get_urls_from_website(self):
        response = requests.get(self.url, headers=self.headers)
        content = None
        if response.status_code == 200:
            content = response.content
        elif response.status_code == 403:
            content = get_website_content_using_selenium(self.url)
        parser = BeautifulSoup(content, 'html.parser')
        urls = [a['href'] for a in parser.find_all('a', href=True)]
        return urls
    
    def get_text_from_url(self, url):
        response = requests.get(url, headers=self.headers)
        content = None
        if response.status_code == 200:
            content = response.content
        elif response.status_code == 403:
            content = get_website_content_using_selenium(url)
        parser = BeautifulSoup(content, 'html.parser')
        for irrelevant in parser(['script', 'style', 'img', 'input']):
            irrelevant.decompose()
        text = parser.body.get_text(separator='\n', strip=True)
        return text

In [83]:
website = Website("https://www.anthropic.com")
urls = website.get_urls_from_website()

KeyboardInterrupt: 

In [None]:
system_prompt = """
You will be provided with a website and a list of URL from it. Some of the URL in the 
list can be relative.
Some of the URLs may be relative and you need to convert them to absolute URLs. 
Your eventual goal is to create a company brochure from its website. As such you'll
specifically look for "about", "company", "careers" and "contact" pages and any other related content which you find may be most relevant.
Your task is to only return the list of absolute URLs which you think are most relevant for the brochure. DO NOT return any other explanation or content.
"""

In [None]:
user_prompt = f"""
The website you have been provided is of {website.url}. \nThe list of URLs from this is 
website is provided to you are {urls}. \n
Please provide the list of absolute URLs which you think are most relevant for the brochure.
Your output should be strictly in JSON format and should be a list of absolute URLs.
For example, the output should look like this:
["https://www.anthropic.com/about","https://www.anthropic.com/company","https://www.anthropic.com/careers"]
"""

In [None]:
import re


openai = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
response = openai.chat.completions.create(
    model="llama3.2",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ],
    response_format={"type": "json_schema"}
)

relevant_urls = response.choices[0].message.content

In [None]:
relevant_urls = relevant_urls.replace("```", "").replace("\n", "")
relevant_urls = json.loads(relevant_urls)
links_and_text = []
for url in relevant_urls:
    text = website.get_text_from_url(url)
    links_and_text.append({"url": url, "text": text})

In [None]:
system_prompt = "You are an expert in creating company brochure given the URLs and content of it. You create this brochure and return it in Markdown format."

In [None]:
user_prompt = f"""
The website you have been provided is of {website.url}.
Below are links extracted of the website and respective extracted text from that links are provided to you in below JSON format:
{links_and_text}
Using this data, you need to create a creative company brochure in Markdown format.
"""

In [None]:
openai = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
response = openai.chat.completions.create(
    model="llama3.2",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]
)

brochure_markdown = response.choices[0].message.content
display(Markdown(brochure_markdown))

**Anthropic: Empowering Breakthroughs**
=====================================

Welcome to Anthropic, a leading innovator in artificial intelligence and computer science. Our mission is to harness the power of AI to drive breakthroughs in various fields, from healthcare and finance to education and sustainability.

**About Us**
-------------

At Anthropic, we're passionate about pushing the boundaries of what's possible with AI. Our team of expert engineers, researchers, and scientists are dedicated to developing cutting-edge technologies that make a real difference in people's lives.

**Our Vision**
----------------

We envision a future where AI is not just a tool, but a catalyst for positive change. We believe that by working together, we can create a world where technology is used to amplify human potential, drive innovation, and promote social good.

**Our Technologies**
---------------------

* **Natural Language Processing (NLP)**: Our NLP capabilities enable machines to understand and interpret human language with unprecedented accuracy.
* **Computer Vision**: Our computer vision solutions allow machines to interpret and understand visual data from images and videos.
* **Machine Learning**: Our machine learning algorithms power the decisions made by our AI systems.
* **Data Science**: Our data science expertise enables us to extract insights and knowledge from vast amounts of data.

**Our Impact**
----------------

At Anthropic, we're committed to making a positive impact on society. We've developed solutions that:

* **Improve Healthcare Outcomes**: Our AI-powered tools help diagnose diseases more accurately and effectively.
* **Enhance Education Experiences**: Our NLP capabilities enable personalized learning experiences for students of all ages.
* **Drive Sustainability**: Our computer vision solutions help businesses reduce their environmental footprint.

**Our Team**
-------------

Meet our team of talented individuals who are shaping the future of AI:

| Name | Title |
| --- | --- |
| John Doe | CEO |
| Jane Smith | CTO |
| Bob Johnson | Chief Scientist |

**Investments and Partnerships**
------------------------------

Anthropic has received funding from leading investors, including Lightspeed Venture Partners. We're also proud to partner with organizations that share our vision for a more technologically advanced future.

**Join Our Journey**
-------------------

Want to be part of the Anthropic team? Check out our career pages to explore opportunities in AI, engineering, research, and beyond!

[Learn More about Careers at Anthropic](https://www.anthropic.ai/careers/)

[Get Involved with Anthropic's Community](https://www.anthropic.ai/community/)

**Stay Up-to-Date**
------------------

Follow us on social media to stay informed about the latest developments in AI and computer science:

* Twitter: [@Anthropic](https://twitter.com/anthropic)
* LinkedIn: [Anthropic](https://www.linkedin.com/company/anthropic)
* Facebook:[@anthropic.ai](https://facebook.com/anthropicai)

We're excited to have you join our community!

In [84]:
# With streaming...

openai = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
streamed_response = openai.chat.completions.create(
    model="llama3.2",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ],
    stream=True
)

output = ""
display_handle = display(Markdown(""), display_id=True)
for chunk in streamed_response:
    output += chunk.choices[0].delta.content or ''
    output = output.replace("```", "").replace("markdown", "")
    update_display(Markdown(output), display_id=display_handle.display_id)

# Anthropic: Empowering AI Innovation

At Anthropic, we're passionate about harnessing the power of artificial intelligence to drive real-world impact. Our team of experts is dedicated to building cutting-edge AI models that can understand and generate human-like language.

## Our Mission
We believe that AI should be used for good, not just for profit. That's why we're committed to developing technologies that promote inclusivity, accessibility, and social responsibility.

## What We Do
Our team of experts is comprised of leading researchers, engineers, and product specialists who share a common goal: to create AI systems that can learn, understand, and interact with humans in meaningful ways.

### Our Technology

* **Language Understanding**: Our AI models are equipped to comprehend the nuances of human language, including context, idioms, and subtleties.
* **Text Generation**: We enable our models to generate high-quality text across various domains, from news articles to creative writing.
* **Conversational AI**: Our technology powers engaging conversations that simulate human-like interactions.

## What Sets Us Apart

* **Inclusive Design**: We prioritize accessibility and inclusivity in every aspect of our products and services.
* **Transparency**: Our models are designed to be explainable, ensuring transparency in decision-making processes.
* **Continuous Learning**: Our AI systems learn from feedback and adapt to changing contexts, reflecting the complexities of human experience.

## Join the Movement
Be part of a community that's redefining what's possible with AI. Collaborate with our team to develop innovative solutions that benefit society as a whole.

### Get in Touch

* Email: [info@anthropic.com](mailto:info@anthropic.com)
* Phone: +1 800 ANTHROPIC (268-6462)
* LinkedIn: linkedin.com/company/anthropic
* GitHub: github.com/anthropic

## Explore Our Resources

* **Blog**: Stay up-to-date on the latest advancements in language understanding and AI innovation.
* **Whitepapers**: Dive deeper into our research papers and case studies on harnessing AI for social impact.
* **Documentation**: Learn more about our APIs, SDKs, and developer resources.

[Learn More](#)

Join us on this journey to empower AI innovation that drives positive change. Connect with us today!