In [0]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown,display,update_display
from openai import OpenAI

In [0]:
load_dotenv()
api_key = os.getenv('OpenAI_API_KEY')
model = 'gpt-4o-mini'
openai = OpenAI()

In [0]:
class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]   

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [0]:
link_system_prompt = """You are provided with a list of links found on a webpage.
You must decide which links would be most relevant to include in a brochure about the company,
such as links to an About page, Company page, or Careers/Jobs pages.

Respond in JSON format like this:
Example 1:
Input:
[
    "https://example.com",
    "https://example.com/about",
    "https://example.com/contact",
    "https://example.com/careers"
]

Output:
{
    "links": [
        {"type": "about page", "url": "https://example.com/about"},
        {"type": "careers page", "url": "https://example.com/careers"}
    ]
}

Example 2:
Input:
[
    "https://anothercompany.org/home",
    "https://anothercompany.org/team",
    "https://anothercompany.org/jobs",
    "https://anothercompany.org/blog"
]

Output:
{
    "links": [
        {"type": "about page", "url": "https://anothercompany.org/team"},
        {"type": "careers page", "url": "https://anothercompany.org/jobs"}
    ]
}

Now analyze the following list of links:
"""


In [0]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [0]:
def get_links(url):
    website = Website(url)
    completion = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}  
    )
    result = completion.choices[0].message.content
    return json.loads(result)


In [0]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [0]:
get_brochure_system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [0]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [0]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": get_brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))
    return result             


In [0]:
brochure = create_brochure("Anthropic", "https://anthropic.com")

In [0]:
import ollama
MODEL = "llama3.2"

translate_system_prompt = (
    "You are a native Spanish speaker who teaches English at a university. "
    "Your goal is to translate from English to Spanish while preserving the Markdown format, emoji usage, and playful tone. "
    "Keep the original structure exactly. Be creative, natural, and engaging for a Spanish-speaking reader."
)

def translate_user_prompt(brochure):
    prompt = f"""You are looking at a company brochure:

\"\"\"{brochure}\"\"\"

Your goal is to translate this brochure into Spanish."""
    return prompt

In [0]:
def message(brochure):
    return[
        {'role':'system','content':translate_system_prompt},
        {'role':'user','content':translate_user_prompt(brochure)}
    ]

In [0]:
def translate(brochure):
    brochure = brochure
    response = ollama.chat(MODEL,message(brochure))
    result = response['message']['content']  
    display(Markdown(result))                
    return result         

In [0]:
translated_text = translate(brochure)
