In [1]:
import os
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown,display
from openai import OpenAI
import json
from typing import List

In [2]:
MODEL="llama3.2"
openai = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

In [3]:
# message="Bye, I'm Deadpool"
# response=openai.chat.completions.create(model=MODEL,messages=[{"role":"user","content":message}])
# print(response.choices[0].message.content)

In [4]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class bsWebsite:
    def __init__(self,url):
        self.url=url
        response=requests.get(url,headers=headers)
        soup=BeautifulSoup(response.content,"html.parser")
        self.title=soup.title.string if soup.title else "No title"
        if soup.body:
            for irr in soup.body(["img","script","input","style"]):
                irr.decompose()
            self.text=soup.body.get_text(separator="\n",strip=True)
        else:
            self.text=""

        links=[link.get('href') for link in soup.find_all('a')]
        self.links=[link for link in links if link]
        # or can do it one line:
        # self.links=[link.get('href') for link in soup.find_all('a') if link.get('href')]
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Content:\n{self.text}\n\n"

In [5]:
# ed=bsWebsite("https://edwarddonner.com")
# ed.links

In [6]:
link_system_prompt="""You are an expert analyzer who recieves a list of links found on a webpage. Your job is to analyze which links 
                      are relevant to include in a brochure for the website. This includes links like About page, Careers/Jobs page, 
                      Company page, etc.
                      You should respond in JSON as demonstrated below:
                      {
                          "links":[
                              {"type":"About page","url":"https://aboutpage/url/goes/here"},
                              {"type":"Company page","url":"https://companypage/url/goes/here"}
                          ]
                      }
                    """

In [7]:
def link_user_prompt(website):
    user_prompt=f"""Here is a list of links found at {website.title}. Provide relevant links only and make sure to
                    discard links like Terms of Services, Privacy Policy, Help, Cookies Preferences, etc.
                    Respond with full https url in JSON format.
                    List of links (some might be relevant):
                    {website.links}
                """
    return user_prompt

In [8]:
# link_user_prompt(ed)

In [9]:
def get_links(url):
    website=bsWebsite(url)
    response=openai.chat.completions.create(
        model=MODEL,
        messages=[
                    {"role":"system","content":link_system_prompt},
                    {"role":"user","content":link_user_prompt(website)}
                ],
        response_format={"type":"json_object"})
    result=response.choices[0].message.content
    return json.loads(result)

In [10]:
# get_links("https://huggingface.co")

In [11]:
def get_all_details(url):
    result=f"Landing Page:\n"
    result+=bsWebsite(url).get_contents()
    links=get_links(url)
    for link in links['links']:
        result+=f"\n\n{link['type']}\n"
        try:
            result+=bsWebsite(link['url']).get_contents()
        except Exception as e:
            result+=f"Cannot retrive from {link['url']} due to error: {e}"       
    return result

In [12]:
# get_all_details("https://huggingface.co")

In [13]:
bro_system_prompt="You are an assistant who's excellent at creating brochures for companies by analyzing\
                   their website's content and relevant pages. You include all the crucial details like\
                   information about the company, what is it about, culture of the company, job or career\
                   opportunities, subscriptions, etc, if it's given. Create the brochure in a way that impresses\
                   prospective customers or investors."

In [14]:
def bro_user_prompt(name,url):
    user_prompt=f"""You are looking at a company called {name}.
                  Here is the content from its landing page and other relevant pages.
                  Use this information to build an amazing short brochure for the company.
                  Respond in markdown.
                  Content:\n"""
    user_prompt+=get_all_details(url)
    return user_prompt[:8_000]

In [15]:
# bro_user_prompt("Hugging face","https://huggingface.co")

In [16]:
def create_brochure(name,url):
    response=openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role":"system","content":bro_system_prompt},
            {"role":"user","content":bro_user_prompt(name,url)}
        ]
    )
    result=response.choices[0].message.content
    return result

In [17]:
# create_brochure("HuggingFace", "https://huggingface.co")

In [18]:
spain_system_prompt="""You are an exceptional translator who is very proficient in Spanish.
                       You're a given the contents of a brochure in English to analyze and translate to Spanish.
                       Make sure the translation is coherent and makes sense to a Spanish human reader.
                       Respond in markdown."""

In [19]:
def spain_user_prompt(name,brochure):
    user_prompt=f"""You are provided with the English brochure of the company {name}.
                    Make sure to include every piece of information present in the brochure.
                    Do not convert the name of company and other short-forms if any.
                    Respond in markdown. Here is the content of brochure:\n\n{brochure}"""
    return user_prompt

In [20]:
def spanish_brochure(name,url):
    print("Creating brochure...")
    brochure=create_brochure(name,url)
    print("Brochure created!")
    # Safety check
    if not isinstance(brochure, str) or not brochure.strip():
        brochure = "[Brochure could not be generated.]"
        print("Brochure was empty!")

    print("Length of brochure: ",len(brochure))
    # Optional trimming
    brochure = brochure[:5000]
    print(brochure[:500])
    
    print("Creating user prompt: ")
    user_prompt = spain_user_prompt(name, brochure)
    print("User prompt: ",user_prompt)

    response=openai.chat.completions.create(
    model=MODEL,
    messages=[
        {"role":"system","content":spain_system_prompt},
        {"role":"user","content":user_prompt}
    ])
    result=response.choices[0].message.content
    display(Markdown(result))

In [21]:
spanish_brochure("HuggingFace", "https://huggingface.co")

Creating brochure...
Brochure created!
Length of brochure:  2456
**Hugging Face Brochure**

[Image of Hugging Face logo]

**Welcome to Hugging Face**

The AI community building the future. Explore, collaborate, and discover cutting-edge machine learning models, datasets, and applications on our platform.

**About Us**

Hugging Face is a collaboration platform for the machine learning community. We empower the next generation of machine learning engineers, scientists, and end users to learn, collaborate, and share their work to build 
Creating user prompt: 
User prompt:  You are provided with the English brochure of the company HuggingFace.
                    Make sure to include every piece of information present in the brochure.
                    Do not convert the name of company and other short-forms if any.
                    Respond in markdown. Here is the content of brochure:

**Hugging Face Brochure**

[Image of Hugging Face logo]

**Welcome to Hugging Face**

The AI commun

**Hugging Face Brochure**
=========================

[Imagen de la logotipo de Hugging Face]

**Bienvenido a Hugging Face**

La comunidad del aprendizaje automático construyendo el futuro. Explora, colaborea y descubre modelos avanzados de aprendizaje automático, conjuntos de datos y aplicaciones en nuestra plataforma.

**Sobre Nosotros**

Hugging Face es una plataforma de colaboración para la comunidad de aprendizaje automático. Empoderamos a los ingenieros de aprendizaje automático, científicos y usuarios finales para que aprendan, colaboren y compartan sus trabajos para construir un futuro abierto y ético de inteligencia artificial juntos.

[Imagen de miembros del equipo]

**Nuestra Misión**

* Democratizar la inteligencia artificial a través del procesamiento de lenguaje natural
* Proporcionar un lugar central para compartir, explorar, descubrir y experimentar con software abierto de aprendizaje automático
* Fomentar una comunidad de entusiastas de aprendizaje automático, ingenieros, científicos y usuarios finales

**Lo que Ofrecemos**

* Un rico repositorio de 1M+ modelos y 250k+ conjuntos de datos
* Una plataforma para construir, colaborar y compartir aplicaciones de inteligencia artificial
* Un stack abierto source para rápida prototipificación e iteración
* Una comunidad colaborativa con oportunidades para el crecimiento profesional y la formación

**Productos**

* **Hugging Face**: Nuestra software de procesamiento de lenguaje para resolver y democratizar la inteligencia artificial
* **Gradio**: Una plataforma para construir aplicaciones inmersivas en Python
* **Transformer Hub**: Colección de modelos pre-entrenados de transformadores para aplicaciones de visión y visión utilizando el lenguaje natural

**Comunidad**

Únete a nuestra vibrante comunidad de 50,000+ organizaciones utilizando Hugging Face:

* Comparte tu trabajo e infórmate sobre aplicaciones de inteligencia artificial nuevas
* Descubre nuevos modelos, conjuntos de datos e interfaces de usuario
* Explora aplicaciones de inteligencia artificial y colaborea con otros
* Participa en hackathones, concursos y eventos

**Oportunidades de Carrera**

Estamos apasionados por construir un futuro abierto e inclusivo. Explore nuestras plataformas de empleo de la comunidad Hugging Face:

* **Desarrollo de Software**: Únete al equipo de ingenieros y científicos trabajando en proyectos de aprendizaje automático.
* **Investigación y Desarrollo**: Colabora con nosotros en iniciativas de investigación e información que empujen fronteras del aprendizaje automático

[Imagen de la logotipo de Hugging Face]

**Comienza Ahora!**

Explora nuestra plataforma, contribuye a proyectos de software abiertos e infórmate sobre nuestros recursos y herramientas. Registra tu cuenta ahora para obtener acceso gratuito.

[Botón CTA: Inscribirse Ahora]

---

**Atrás**

* [Home Hugging Face](http://huggingface.co)
* [Blog de Hugging Face](https://blog.huggingface.co)
* [GitHub de Hugging Face](https://github.com/huggingface)