### News article creator on your language

Creating a product that builds a short news article containing all breaking news and latest events and their short summaries based on the links provided. It can also provide the output in the language that the user wants.

In [None]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [None]:
load_dotenv(override=True)
api_key = os.getenv('GEMINI_API_KEY')

if api_key and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gemini-2.5-flash'
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"

gemini = OpenAI(base_url=GEMINI_BASE_URL, api_key=api_key)

In [None]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be useful for making a news article containing
latest news events. You do not need to include irrelevant links such as About or Company pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "article link, "url": "https://full.url/news/national/short-headline.ece"},
        {"type": "category page", "url": "https://full.url/sport"},
    ]
}
"""

In [None]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links which can direct to pages having news content,
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [None]:
def select_relevant_links(url):
    response = gemini.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [None]:
# def accumulate_relevant_links(url, it, links_final):
#     if(it==2):
#         return
#     links = select_relevant_links(url)
#     for link in links['links']:
#         links_final.append(link['url'])
#         accumulate_relevant_links(link['url'], it+1, links_final)

In [None]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])   
    return result

In [None]:
article_system_prompt = """
You are a smart assistant that analyzes the contents of several relevant pages from a news website and
creates an article having recent news events. You create the article like this-
For every news you have to give a headline and then give a short content regarding that news.
Respond in markdown without code blocks.
"""

In [None]:
def get_article_user_prompt(url):
    user_prompt = f"""
You are looking at the contents of newspaper website across different pages.
Here are the contents of its landing page and other relevant pages;
use this information to build a short article in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:10000] # Truncate if more than 5,000 characters
    return user_prompt

In [None]:
def create_article(url):
    response = gemini.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": article_system_prompt},
            {"role": "user", "content": get_article_user_prompt(url)}
        ],
    )
    result = response.choices[0].message.content
    return result

In [None]:
result = create_article("https://www.bbc.com/")
display(Markdown(result))

In [None]:
language_system_prompt="""
    You are given an article in English. You have to convert it into the specified language preserving all
    the content. You can include English terms but provide a
    proper context for the term.
    
"""

In [None]:
language_user_prompt="""
Following is the content. Please convert this into proper output:

"""

In [None]:
def create_final_article(url, language):
    response = gemini.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": language_system_prompt+f"Language: {language}"},
            {"role": "user", "content": language_user_prompt+create_article(url)}
        ],
    )
    print(f"Converting into {language}")
    result = response.choices[0].message.content
    display(Markdown(result))

In [None]:
create_final_article("https://www.bbc.com/", "Hindi")