# NewsAPI Testing

In [1]:
from newsapi import NewsApiClient
import newsapi as news
from dotenv import load_dotenv
import os

load_dotenv(override=True)

True

In [2]:
news_api_key = os.getenv("NEWS_API_KEY")
if not news_api_key:
    print("NEWS_API_KEY environment variable not set.")

# get_top_headlines

In [3]:
print(news.const.sort_method)
print(news.const.categories)

{'popularity', 'publishedAt', 'relevancy'}
{'science', 'general', 'entertainment', 'technology', 'sports', 'health', 'business'}


In [4]:
newsapi = NewsApiClient(api_key=news_api_key)

top_headlines = newsapi.get_top_headlines(country='us', language='en', category="business", q="trump")

top_headlines

{'status': 'ok',
 'totalResults': 10,
 'articles': [{'source': {'id': 'axios', 'name': 'Axios'},
   'author': 'Jason Lalljee',
   'title': '"Utterly unqualified": Trump BLS pick gets panned by conservative economists - Axios',
   'description': "Conservative economists cited examples of Trump's BLS pick appearing to misunderstand data he would be responsible for.",
   'url': 'https://www.axios.com/2025/08/12/trump-bls-ej-antoni-economists',
   'urlToImage': 'https://images.axios.com/lvY5i6KrfHxGhwktIdeP8ejh4u8=/233x275:5253x3099/1366x768/2025/08/12/1755026374090.jpeg',
   'publishedAt': '2025-08-13T17:42:57Z',
   'content': 'Driving the news: Trump announced Antoni as the next BLS commissioner a week after firing the previous head, Erika McEntarfer, which he did after jobs data showed cracks in the economy.\r\nThe presiden… [+3458 chars]'},
  {'source': {'id': 'fortune', 'name': 'Fortune'},
   'author': 'Sasha Rogelberg',
   'title': "Switzerland warns its companies that no, they can't

In [5]:
articles = top_headlines["articles"]

articles

[{'source': {'id': 'axios', 'name': 'Axios'},
  'author': 'Jason Lalljee',
  'title': '"Utterly unqualified": Trump BLS pick gets panned by conservative economists - Axios',
  'description': "Conservative economists cited examples of Trump's BLS pick appearing to misunderstand data he would be responsible for.",
  'url': 'https://www.axios.com/2025/08/12/trump-bls-ej-antoni-economists',
  'urlToImage': 'https://images.axios.com/lvY5i6KrfHxGhwktIdeP8ejh4u8=/233x275:5253x3099/1366x768/2025/08/12/1755026374090.jpeg',
  'publishedAt': '2025-08-13T17:42:57Z',
  'content': 'Driving the news: Trump announced Antoni as the next BLS commissioner a week after firing the previous head, Erika McEntarfer, which he did after jobs data showed cracks in the economy.\r\nThe presiden… [+3458 chars]'},
 {'source': {'id': 'fortune', 'name': 'Fortune'},
  'author': 'Sasha Rogelberg',
  'title': "Switzerland warns its companies that no, they can't dodge Trump's tariffs by routing goods through the tiny neig

In [6]:
source = articles[1]["source"]["id"]

print(source)

fortune


In [7]:
cnn_and_fox_articles = [article for article in articles if article["source"]["id"] == "fox-news" or article["source"]["id"] == "cnn"]

cnn_and_fox_articles

[]

In [8]:
from agents import Agent, Runner, trace, function_tool
import asyncio

In [9]:
from typing import List, Optional

@function_tool
def get_headlines(
    sources: Optional[List[str]] = None,
    category: Optional[str] = None,
    query: Optional[str] = None
) -> List[dict]:
    """
    Returns news headlines from the provided sources, category, and/or query.

    Args:
        sources (Optional[List[str]]): A list of news source IDs to filter by.
        category (Optional[str]): The category of news to filter by (e.g., 'business', 'technology').
        query (Optional[str]): A keyword or phrase to search for in headlines.

    Returns:
        List[dict]: A list of articles, each represented as a dictionary.
    """
    newsapi = NewsApiClient(api_key=news_api_key)
    top_headlines = newsapi.get_top_headlines(country='us', language='en', category=category, q=query)
    articles = top_headlines["articles"]
    if sources:
        return [article for article in articles if article["source"]["id"] in sources]
    return articles
    

In [10]:
instructions = """
You are an excellent writer of an email news digest. You carefully listen to the desires of your customer and write them a personalized news digest including only the types of news that they are interested in.
You always make sure to cite your sources by including the link to the original article and write without bias and in a neat markdown format. 
You have a history of using reliable sources to include only the most up-to-date news.
Always use the get_headlines tool to get up-to-date news headlines.
"""

news_agent = Agent(
    name="News Agent",
    tools=[get_headlines],
    instructions=instructions,
    model="gpt-4.1"
)

In [11]:
from IPython.display import display, Markdown

prompt = """
My name is Arnav and I'm looking to hear about what's new in the health space, specifically news about diseases.
"""

with trace("News Digest"):
    result = await Runner.run(news_agent, input=prompt)
    display(Markdown(result.final_output))

Hello Arnav,

Thank you for your interest in staying updated on the latest in the health space, especially regarding diseases. At this moment, there are no new headlines available specifically about diseases. News updates can be intermittent, but I am happy to check again or broaden the search if you’d like (for example, to general health news or specific diseases of interest).

Would you like to adjust your preferences or try a wider health news category? Let me know how I can personalize your news digest further!

# get_sources

In [12]:
sources = newsapi.get_sources()

sources

{'status': 'ok',
 'sources': [{'id': 'abc-news',
   'name': 'ABC News',
   'description': 'Your trusted source for breaking news, analysis, exclusive interviews, headlines, and videos at ABCNews.com.',
   'url': 'https://abcnews.go.com',
   'category': 'general',
   'language': 'en',
   'country': 'us'},
  {'id': 'abc-news-au',
   'name': 'ABC News (AU)',
   'description': "Australia's most trusted source of local, national and world news. Comprehensive, independent, in-depth analysis, the latest business, sport, weather and more.",
   'url': 'https://www.abc.net.au/news',
   'category': 'general',
   'language': 'en',
   'country': 'au'},
  {'id': 'aftenposten',
   'name': 'Aftenposten',
   'description': 'Norges ledende nettavis med alltid oppdaterte nyheter innenfor innenriks, utenriks, sport og kultur.',
   'url': 'https://www.aftenposten.no',
   'category': 'general',
   'language': 'no',
   'country': 'no'},
  {'id': 'al-jazeera-english',
   'name': 'Al Jazeera English',
   'desc

In [13]:
sources_list = sources["sources"]

american_sources = [source for source in sources_list if source["country"] == "us" and source["language"] == "en"]

american_sources

[{'id': 'abc-news',
  'name': 'ABC News',
  'description': 'Your trusted source for breaking news, analysis, exclusive interviews, headlines, and videos at ABCNews.com.',
  'url': 'https://abcnews.go.com',
  'category': 'general',
  'language': 'en',
  'country': 'us'},
 {'id': 'al-jazeera-english',
  'name': 'Al Jazeera English',
  'description': 'News, analysis from the Middle East and worldwide, multimedia and interactives, opinions, documentaries, podcasts, long reads and broadcast schedule.',
  'url': 'https://www.aljazeera.com',
  'category': 'general',
  'language': 'en',
  'country': 'us'},
 {'id': 'ars-technica',
  'name': 'Ars Technica',
  'description': "The PC enthusiast's resource. Power users and the tools they love, without computing religion.",
  'url': 'https://arstechnica.com',
  'category': 'technology',
  'language': 'en',
  'country': 'us'},
 {'id': 'associated-press',
  'name': 'Associated Press',
  'description': 'The AP delivers in-depth coverage on the internat

In [14]:
@function_tool
def get_sources() -> List[dict]:
    """
    Returns a list of US-based, English-language news sources.

    Returns:
        List[dict]: A list of source objects from NewsAPI that are in the US and in English.
    """
    newsapi = NewsApiClient(api_key=news_api_key)
    sources = newsapi.get_sources()
    sources_list = sources.get("sources", [])
    american_sources = [
        source for source in sources_list
        if source.get("country") == "us" and source.get("language") == "en"
    ]
    return american_sources
    

In [15]:
instructions = """
You are an excellent writer of an email news digest. You carefully listen to the desires of your customer and write them a personalized news digest including only the types of news that they are interested in.
You always make sure to cite your sources by including the link to the original article and write without bias and in a neat markdown format. 
You have a history of choosing the best sources for the user based on the type of news they like, even if they don't mention any source preferences.

Always use the get_sources tool to get a list of possible news sources you can get news from and information about them.
Always use the get_headlines tool to get up-to-date news headlines.
"""

news_agent = Agent(
    name="News Agent",
    tools=[get_headlines, get_sources],
    instructions=instructions,
    model="gpt-4.1"
)

In [16]:
from IPython.display import display, Markdown

prompt = """
My name is Arnav and I'm looking to hear about what's new in the technology space.
"""

with trace("News Digest"):
    result = await Runner.run(news_agent, input=prompt)
    display(Markdown(result.final_output))

Hello Arnav,

Here’s your personalized update on the latest happenings in technology:

---

### Technology News Digest

#### 1. **Upcoming Google Pixel 10 May Feature Qi2 Wireless Charging**
A fresh leak suggests that Google’s next Pixel phone, the Pixel 10, will introduce “Pixelsnap” cases with built-in magnetic rings compatible with Qi2 wireless charging. If confirmed, this would bring Apple MagSafe-like convenience to Android.  
_Read more: [The Verge](https://www.theverge.com/news/758906/pixel-10-qi2-charging-pixelsnap-case-leak)_

#### 2. **Google Gemini Will Learn from Your Chats Unless You Opt Out**
Google’s AI, Gemini, will start learning from your conversations to provide personalized results—unless you change your privacy settings to prevent it. Now might be a good time to review your privacy controls if you use Gemini-powered products.  
_Read more: [Ars Technica](https://arstechnica.com/ai/2025/08/google-gemini-will-now-learn-from-your-chats-unless-you-tell-it-not-to/)_

#### 3. **Pebble Smartwatch Returns as Pebble Time 2**
The beloved Pebble smartwatch brand is officially back. The new Pebble Time 2 has been announced, and the classic brand is once again focused on delivering unique, independent smartwatches.  
_Read more: [TechCrunch](https://techcrunch.com/2025/08/13/pebbles-smartwatch-is-back-pebble-time-2-specs-revealed/)_

---

Would you like updates on any other technology topics, or news from another field? Let me know and I'll refine your digest further!

# get_everything

In [19]:
all_articles = newsapi.get_everything(#q='AI',
                                      sources='bbc-news,the-verge',
                                      #domains='bbc.co.uk,techcrunch.com',
                                      from_param='2025-08-13',
                                      to='2025-08-13',
                                      language='en',
                                      sort_by='relevancy',
                                      page=1)

all_articles

{'status': 'ok',
 'totalResults': 47,
 'articles': [{'source': {'id': 'the-verge', 'name': 'The Verge'},
   'author': 'Andrew J. Hawkins',
   'title': 'Tensor wants to be the first company to sell you a ‘robocar’ —\xa0but who are they?',
   'description': 'A new company is launching today that claims to have developed “the first volume-produced, consumer-ready autonomous vehicle — designed from the ground up for private ownership at scale.” The company is called Tensor, and it describes itself as a “leading AI …',
   'url': 'https://www.theverge.com/news/758605/tensor-autox-autonomous-vehicle-robocar-personal-own-china',
   'urlToImage': 'https://platform.theverge.com/wp-content/uploads/sites/2/2025/08/Tensor_Robocar.jpg?quality=90&strip=all&crop=0%2C10.752607989199%2C100%2C78.494784021602&w=1200',
   'publishedAt': '2025-08-13T15:03:42Z',
   'content': '<ul><li></li><li></li><li></li></ul>\r\nThe company says its based in San Jose, but it appears to be connected to an autonomous vehic

In [18]:
article = newsapi.get_everything(domains='theverge.com', q='Hidden Door is an AI storytelling game that actually makes sense', sort_by='publishedAt')

article

{'status': 'ok',
 'totalResults': 1,
 'articles': [{'source': {'id': 'the-verge', 'name': 'The Verge'},
   'author': 'Jay Peters',
   'title': 'Hidden Door is an AI storytelling game that actually makes sense',
   'description': "Years before ChatGPT jump-started the generative AI wave, OpenAI technology powered a game called AI Dungeon 2 that essentially let you improvise an open-ended, anything-goes story with an AI narrator. Hidden Door, a new platform that's now in early access, a…",
   'url': 'https://www.theverge.com/games/757816/hidden-door-early-access-ai-story',
   'urlToImage': 'https://platform.theverge.com/wp-content/uploads/sites/2/2025/08/key_art_banner.avif?quality=90&strip=all&crop=0,3.4613147178592,100,93.077370564282',
   'publishedAt': '2025-08-13T01:46:04Z',
   'content': '<ul><li></li><li></li><li></li></ul>\r\nThe platform lets you create stories in familiar universes, but you cant just write your way to an instant win.\r\nThe platform lets you create stories in f

In [None]:
from typing import List, Optional


def get_headlines_v2(
    sources: Optional[List[str]] = None,
    category: Optional[str] = None,
    query: Optional[str] = None
) -> List[dict]:
    """
    Returns news headlines from the provided sources, category, and/or query.

    Args:
        sources (Optional[List[str]]): A list of news source IDs to filter by.
        category (Optional[str]): The category of news to filter by (e.g., 'business', 'technology').
        query (Optional[str]): A keyword or phrase to search for in headlines.

    Returns:
        List[dict]: A list of articles, each represented as a dictionary.
    """
    newsapi = NewsApiClient(api_key=news_api_key)
    top_headlines = newsapi.get_top_headlines(country='us', language='en', category=category, q=query)
    top_articles = newsapi.get_everything(q=query,
                                      sources="".join(sources) if sources else None,
                                      from_param='2025-08-13',
                                      to='2025-08-13',
                                      language='en',
                                      sort_by='relevancy',
                                      page=1)
    articles = top_headlines["articles"] + top_articles["articles"]
    if sources:
        return [article for article in articles if article["source"]["id"] in sources]
    
    return articles

get_headlines_v2(query='AI', category='technology')
    

TypeError: unhashable type: 'dict'