# News API Tests

## Overview

Aggregates results from two news APIs and standardises the format

In [1]:
import os
from dotenv import load_dotenv
from newsapi import NewsApiClient
from newscatcherapi import NewsCatcherApiClient
import requests
import pandas as pd

load_dotenv()
print("Import successful.")

Import successful.


In [2]:
country_code = 'AU'
news_api_key = os.environ.get("NEWS_API_KEY")
news_catcher_api_key = os.environ.get("NEWS_CATCHER_API_KEY")

## Standardised Mapping

News Article Schema

- title: string
- image: string
- description: string
- url: string
- publishDate: string


In [3]:
# Initialise news API client
newsapi = NewsApiClient(api_key=news_api_key)

In [5]:
query = 'space OR technology'

all_articles = newsapi.get_everything(language='en', q=query)['articles']
all_articles

[{'source': {'id': 'engadget', 'name': 'Engadget'},
  'author': 'Billy Steele',
  'title': "Arden brings BBQ indoors thanks to 'smoke elimination' technology",
  'description': "As the weather warms up, it's time for aspiring pit masters to dust off their aprons and meat probes before heading outside for some low-and-slow cooking. Pellet grills have become a popular choice for backyard cooks as they offer the flavor of food cooked ov…",
  'url': 'https://www.engadget.com/firstbuild-arden-indoor-smoker-ge-appliances-130051688.html',
  'urlToImage': 'https://s.yimg.com/os/creatr-uploaded-images/2022-03/0b298520-aeca-11ec-b1cd-dbc1ddb53c22',
  'publishedAt': '2022-03-29T13:00:51Z',
  'content': "As the weather warms up, it's time for aspiring pit masters to dust off their aprons and meat probes before heading outside for some low-and-slow cooking. Pellet grills have become a popular choice f… [+5128 chars]"},
 {'source': {'id': 'the-verge', 'name': 'The Verge'},
  'author': 'Loren Grush',

In [6]:
top_articles = newsapi.get_top_headlines(country=country_code.lower(), page_size=100)['articles']
top_articles

[{'source': {'id': None, 'name': 'The Sydney Morning Herald'},
  'author': 'Broede Carmody',
  'title': 'Election 2022 LIVE updates: Anne Ruston named Scott Morrison’s pick for health minister as Coalition, Labor campaigns continue across the country - Sydney Morning Herald',
  'description': 'Scott Morrison has confirmed he wants Social Services Minister Anne Ruston to replace Greg Hunt as health minister should the Coalition win another term of government, Labor leader Anthony Albanese is in Queensland today and the PM is in Sydney.',
  'url': 'https://www.smh.com.au/national/election-2022-live-updates-anne-ruston-named-scott-morrison-s-pick-for-health-minister-as-coalition-labor-campaigns-continue-across-the-country-20220417-p5adyy.html',
  'urlToImage': 'https://static.ffx.io/images/$zoom_0.5519%2C$multiply_0.7554%2C$ratio_1.777778%2C$width_1059%2C$x_0%2C$y_0/t_crop_custom/q_86%2Cf_auto/t_smh_live_no_age_social_wm/85a44809304d797899b45c94db58b03a9a83ac5f',
  'publishedAt': '2022-04

In [7]:
def map_to_standardised_format_news_api(article):
    return {
        'title': article['title'],
        'image': article['urlToImage'],
        'description': article['description'],
        'url': article['url'],
        'publishDate': article['publishedAt']
    }

list(map(map_to_standardised_format_news_api, top_articles))

[{'title': 'Election 2022 LIVE updates: Anne Ruston named Scott Morrison’s pick for health minister as Coalition, Labor campaigns continue across the country - Sydney Morning Herald',
  'image': 'https://static.ffx.io/images/$zoom_0.5519%2C$multiply_0.7554%2C$ratio_1.777778%2C$width_1059%2C$x_0%2C$y_0/t_crop_custom/q_86%2Cf_auto/t_smh_live_no_age_social_wm/85a44809304d797899b45c94db58b03a9a83ac5f',
  'description': 'Scott Morrison has confirmed he wants Social Services Minister Anne Ruston to replace Greg Hunt as health minister should the Coalition win another term of government, Labor leader Anthony Albanese is in Queensland today and the PM is in Sydney.',
  'url': 'https://www.smh.com.au/national/election-2022-live-updates-anne-ruston-named-scott-morrison-s-pick-for-health-minister-as-coalition-labor-campaigns-continue-across-the-country-20220417-p5adyy.html',
  'publishDate': '2022-04-17T02:17:45Z'},
 {'title': "India's New Covid Vax Candidate Can Withstand 100 Degrees C | Nation 

## News Catcher API

In [4]:
newscatcherapi = NewsCatcherApiClient(x_api_key=news_catcher_api_key)

In [8]:
all_articles = newscatcherapi.get_latest_headlines(countries=country_code, lang='en', page_size=100)['articles']
all_articles

[{'title': 'Cruise ships return to Australia after two-year ban as COVID-19 cases remain stubbornly high',
  'author': None,
  'published_date': '2022-04-17 03:00:00',
  'published_date_precision': 'timezone unknown',
  'link': 'https://www.sbs.com.au/news/article/cruise-ships-to-return-after-two-year-ban-while-australias-covid-19-cases-remain-stubbornly-high/zd2pg5ip5',
  'clean_url': 'sbs.com.au',
  'excerpt': "Cruise ships are making a return to Australia's ports after a two-year absence triggered by the COVID-19 pandemic.",
  'summary': "Australian authorities are lifting a ban on cruise liners that's been in place since March 2020 as Australia's COVID-19 cases remain stubbornly high.The country recorded a total of eight COVID-19-related deaths on Saturday, with New South Wales reporting six fatalities, Victoria reporting two and the Northern Territory reporting none.Some states and territories are yet to report their daily COVID-19 figures.Meanwhile New South Wales, Victoria and Q

In [9]:
def map_to_standardised_format_newscatcher(article):
    return {
        'title': article['title'],
        'image': article['media'],
        'description': article['summary'],
        'url': article['link'],
        'publishDate': article['published_date']
    }

list(map(map_to_standardised_format_newscatcher, all_articles))

[{'title': 'Cruise ships return to Australia after two-year ban as COVID-19 cases remain stubbornly high',
  'image': 'https://images.sbs.com.au/dims4/default/ee19452/2147483647/strip/true/crop/7360x4140+0+386/resize/1280x720!/quality/90/?url=http%3A%2F%2Fsbs-au-brightspot.s3.amazonaws.com%2F44%2Faa%2F9ba0450040578083388f08811f36%2Fship.jpg',
  'description': "Australian authorities are lifting a ban on cruise liners that's been in place since March 2020 as Australia's COVID-19 cases remain stubbornly high.The country recorded a total of eight COVID-19-related deaths on Saturday, with New South Wales reporting six fatalities, Victoria reporting two and the Northern Territory reporting none.Some states and territories are yet to report their daily COVID-19 figures.Meanwhile New South Wales, Victoria and Queensland have outlined testing and vaccination requirements for passengers and crew in preparation for the ships to return.",
  'url': 'https://www.sbs.com.au/news/article/cruise-ships

## Aggregator Queries

- Get all articles by country (this will show up in the Explore or recent tabs)
- Get all articles by query (this will show up when we have interests)

In [19]:
def get_articles_by_country(country_code):
    results = []
    newsapi_articles = newsapi.get_top_headlines(country=country_code.lower(), page_size=100)['articles']
    newscatcher_articles = newscatcherapi.get_latest_headlines(countries=country_code, lang='en', page_size=100)['articles']
    results.extend(list(map(map_to_standardised_format_news_api, newsapi_articles)))
    results.extend(list(map(map_to_standardised_format_newscatcher, newscatcher_articles)))
    return results

articles = get_articles_by_country(country_code)

In [45]:
df = pd.DataFrame(articles)
dedup = df.drop_duplicates(subset=['title'])
dedup.to_dict('records')

[{'title': 'Election 2022 LIVE updates: Anne Ruston named Scott Morrison’s pick for health minister as Coalition, Labor campaigns continue across the country - Sydney Morning Herald',
  'image': 'https://static.ffx.io/images/$zoom_0.5519%2C$multiply_0.7554%2C$ratio_1.777778%2C$width_1059%2C$x_0%2C$y_0/t_crop_custom/q_86%2Cf_auto/t_smh_live_no_age_social_wm/85a44809304d797899b45c94db58b03a9a83ac5f',
  'description': 'Scott Morrison has confirmed he wants Social Services Minister Anne Ruston to replace Greg Hunt as health minister should the Coalition win another term of government, Labor leader Anthony Albanese is in Queensland today and the PM is in Sydney.',
  'url': 'https://www.smh.com.au/national/election-2022-live-updates-anne-ruston-named-scott-morrison-s-pick-for-health-minister-as-coalition-labor-campaigns-continue-across-the-country-20220417-p5adyy.html',
  'publishDate': '2022-04-17T02:17:45Z'},
 {'title': "India's New Covid Vax Candidate Can Withstand 100 Degrees C | Nation 

In [11]:
def get_articles_by_query(query, country_code):
    results = []
    newsapi_articles = newsapi.get_everything(language='en', q=query)['articles']
    newscatcher_articles = newscatcherapi.get_search(q=query, countries=country_code, lang='en', page_size=100)['articles']
    results.extend(list(map(map_to_standardised_format_news_api, newsapi_articles)))
    results.extend(list(map(map_to_standardised_format_newscatcher, newscatcher_articles)))
    return results

all_articles = get_articles_by_query(query, country_code)


In [18]:
len(all_articles)

120

In [17]:
df = pd.DataFrame(all_articles)
grouped_df = df.groupby('title')
grouped_df.head()

Unnamed: 0,title,image,description,url,publishDate
0,Arden brings BBQ indoors thanks to 'smoke elim...,https://s.yimg.com/os/creatr-uploaded-images/2...,"As the weather warms up, it's time for aspirin...",https://www.engadget.com/firstbuild-arden-indo...,2022-03-29T13:00:51Z
1,SpaceX poised to send first private crew to th...,https://cdn.vox-cdn.com/thumbor/hmEkXxhGHt6XKf...,"On April 8th, SpaceX is set to launch a privat...",https://www.theverge.com/2022/4/7/23013067/spa...,2022-04-07T20:00:00Z
2,No Man’s Sky's Outlaws update lets you play as...,https://s.yimg.com/os/creatr-uploaded-images/2...,No Man’s Sky\r\n is already an enormous game a...,https://www.engadget.com/no-mans-sky-outlaws-u...,2022-04-13T14:11:27Z
3,"Many Americans distrust emerging technology, n...",https://s.yimg.com/os/creatr-uploaded-images/2...,"For more than a century, popular science ficti...",https://www.engadget.com/many-americans-distru...,2022-03-31T17:00:38Z
4,"Creating Space Command, Australia Strengthens ...",https://static01.nyt.com/images/2022/03/21/wor...,The expanded commitment to space defense refle...,https://www.nytimes.com/2022/03/21/world/austr...,2022-03-21T14:54:48Z
...,...,...,...,...,...
115,"When the Weather Itself Goes Into Battle. Yes,...",https://mustorage.blob.core.windows.net/images...,The year 2025 is not at all far away. And ther...,https://mysteriousuniverse.org/2022/04/When-th...,2022-04-11 17:00:00
116,Australian leaders travel to Europe for the ne...,https://www.industryupdate.com.au/sites/defaul...,Australian hydrogen companies and project owne...,https://www.industryupdate.com.au/article/aust...,2022-04-12 01:03:44
117,Marvel at mechanisms at Discovery,https://www.bendigoadvertiser.com.au/static/16...,Marvel at all there is to do these school holi...,https://www.bendigoadvertiser.com.au/story/769...,2022-04-11 07:13:00
118,"KSA Business: Saudi Military Industries, Boein...",https://www.gdnonline.com/gdnimages/20220410\s...,"Sami group, the Saudi national defense localiz...",https://www.gdnonline.com/Details/1049133,2022-04-10 15:38:14
