In [None]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [None]:
load_dotenv(override=True)
api_key= os.getenv('GEMINI_API_KEY')
if api_key:
    print("api found")

In [None]:
gemini= OpenAI(
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/", api_key= api_key
)

In [None]:
# OLLAMA_BASE_URL = "http://localhost:11434/v1"
# ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')

In [None]:
llm_call= gemini

In [None]:
links= fetch_website_links("https://huggingface.co")
links

In [None]:
sytem_prompt= """
You are provided with a list of links found on a webpage.
you are able to decide which of the links would be most relevant to include brochure about the company,
such as links to an About page,or a company page, or Career/jobs page.
You should respond in JSON in this example and also dont think that u should give only two links, its just for example that it should look like that u can give as many links which are there but it should be relevant as i mentioned earlier :

{
    "links":[
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [None]:
def get_links_user_prompt(url):
    user_prompt= f"""
    Here is the list of links on the website {url}-
    Please decide which of these are relevant web links for a brochure about the company,
    respond with the full https URL in JSON fomat.
    Do not include Terms of service ,Privacy,email links.

    Links (some might be relative links):

    """
    links = fetch_website_links(url)
    user_prompt+= "\n".join(links)
    return user_prompt

In [None]:
# print(get_links_user_prompt("https://huggingface.co"))

In [None]:
MODEL= "gemini-2.5-flash"

In [None]:
def select_relevant_link(url):
    response=llm_call.chat.completions.create(model= MODEL, messages= [
        {"role": "system", "content": sytem_prompt},
        {"role": "user", "content": get_links_user_prompt(url) }
    ], response_format={"type": "json_object"}
    )
    result= response.choices[0].message.content
    links= json.loads(result)
    return links

In [None]:
# select_relevant_link("https://huggingface.co")

In [None]:
def select_relevant_link(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response=llm_call.chat.completions.create(model= MODEL, messages= [
        {"role": "system", "content": sytem_prompt},
        {"role": "user", "content": get_links_user_prompt(url) }
    ], response_format={"type": "json_object"}
    )
    result= response.choices[0].message.content
    links= json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [None]:
# select_relevant_link("https://huggingface.co")

In [None]:
def fetch_page_and_relevant_links(url):
    contents= fetch_website_contents(url)
    relevant_links= select_relevant_link(url)
    result= f"## Landing page:\n\n{contents}\n## Relevant links:\n"
    for link in relevant_links['links']:
        result+= f"\n\n### Link: {link['type']}\n"
        result+= fetch_website_contents(link["url"])
    return result    

In [None]:
print(fetch_page_and_relevant_links("https://huggingface.co"))

In [None]:
brochure_system_prompt= """ 
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors, recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and career/jobs if you have the information,,also give links relevant to it if u have.
"""

In [None]:
def get_brochure_user_prompt(company_name, url):
    user_prompt= f"""
    You are looking at a company called {company_name}
    Here are the contents of its landing page and other relevant pages;
    use this information to build a short brochure of the company in markdown without code blocks.\n\n
    """
    user_prompt+= fetch_page_and_relevant_links(url)
    user_prompt+=user_prompt[:5_000]
    return user_prompt
    

In [None]:
# get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

In [None]:
def create_brochure(company_name, url):
    response= llm_call.chat.completions.create(
        model= MODEL, messages = [
            {"role": "system", "content": brochure_system_prompt },
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ]
    )
    result= response.choices[0].message.content
    display(Markdown(result))

In [None]:
# create_brochure("HuggingFace", "https://huggingface.co")

In [None]:
def stream_brochure(company_name, url):
    stream= llm_call.chat.completions.create(
        model= MODEL, messages = [
            {"role": "system", "content": brochure_system_prompt },
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],stream=True
    )
    response=""
    display_handle=display(Markdown(""),display_id=True)
    for chunk in stream:
        response+= chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id= display_handle.display_id)
    

In [None]:
stream_brochure("HuggingFace", "https://huggingface.co")