In [1]:
# Import libraries
# GoogleNews Documentation : https://pypi.org/project/gnews/
from gnews import GNews
import pandas as pd
import nltk  # NLP library
import warnings
from datetime import date

warnings.filterwarnings('ignore')

In [2]:
# This tokenizer divides a text into a list of sentences by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences.
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/alfonso/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
# Initializing
googlenews = GNews()

In [4]:
# Settings
today = date.today()
googlenews = GNews(start_date=(2023, 1, 1), end_date=(today.year,today.month,today.day))

In [5]:
# Search 
topic = "Gold"
searchednews = googlenews.get_news(topic)
print(f"Articles found:", len(searchednews))

Articles found: 97


In [6]:
# Results
print(searchednews[1])

{'title': 'Gold slips from all-time peak on profit taking, firmer dollar - Reuters', 'description': 'Gold slips from all-time peak on profit taking, firmer dollar  Reuters', 'published date': 'Fri, 19 Jul 2024 07:00:00 GMT', 'url': 'https://news.google.com/rss/articles/CBMisAFBVV95cUxPUWpEd3FLRFJEdnpyMVZ5UC1TdFNwQ3c0R1N2ODNEWEVXV29BOXBXNXhsdE4tVTQ0YkZCQ0F4T0tOSUdhNEtmX25MWDdIamNhNFJGTkgxeXc2elFuTEptU2RMdDRTVlZDYnZrZDRaY2FXbmlmclc0dXFuR2ljWUIyMkFNaGRVUVhPbTZ0c3NUMWVOMkNfdDItdWpqUUIxdG4wcDdfS3JxVkdmWXhoUnRtNw?oc=5&hl=en-US&gl=US&ceid=US:en', 'publisher': {'href': 'https://www.reuters.com', 'title': 'Reuters'}}


In [7]:
# Convert to DataFrame
df = pd.DataFrame(searchednews)
df.tail(20)

Unnamed: 0,title,description,published date,url,publisher
77,MIT physicists turn pencil lead into “gold” - ...,MIT physicists turn pencil lead into “gold” M...,"Tue, 14 Nov 2023 08:00:00 GMT",https://news.google.com/rss/articles/CBMifEFVX...,"{'href': 'https://news.mit.edu', 'title': 'MIT..."
78,Gold drops more than 2% to one-week low as Mid...,Gold drops more than 2% to one-week low as Mid...,"Mon, 22 Apr 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMimgFBV...,"{'href': 'https://www.reuters.com', 'title': '..."
79,Gold rebounds from lows after weak US jobs ope...,Gold rebounds from lows after weak US jobs ope...,"Wed, 04 Sep 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMiogFBV...,"{'href': 'https://www.reuters.com', 'title': '..."
80,Gold prices dip after record highs on profit t...,Gold prices dip after record highs on profit t...,"Wed, 22 May 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMiowFBV...,"{'href': 'https://www.reuters.com', 'title': '..."
81,"Alphabet Soup: NASA’s GOLD Finds Surprising C,...","Alphabet Soup: NASA’s GOLD Finds Surprising C,...","Thu, 27 Jun 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMivwFBV...,"{'href': 'https://science.nasa.gov', 'title': ..."
82,Gold rises on softer dollar as focus shifts to...,Gold rises on softer dollar as focus shifts to...,"Tue, 28 May 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMingFBV...,"{'href': 'https://www.reuters.com', 'title': '..."
83,"Gold gains on dollar retreat, focus now on US ...","Gold gains on dollar retreat, focus now on US ...","Mon, 24 Jun 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMisAFBV...,"{'href': 'https://www.reuters.com', 'title': '..."
84,Gold shines amid geopolitical uncertainties - ...,Gold shines amid geopolitical uncertainties W...,"Wed, 15 May 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMiiwFBV...,"{'href': 'https://blogs.worldbank.org', 'title..."
85,China Is Buying Gold Like There’s No Tomorrow ...,China Is Buying Gold Like There’s No Tomorrow ...,"Sun, 05 May 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMic0FVX...,"{'href': 'https://www.nytimes.com', 'title': '..."
86,Gold drops from near-record level as US jobs d...,Gold drops from near-record level as US jobs d...,"Fri, 06 Sep 2024 18:14:00 GMT",https://news.google.com/rss/articles/CBMivgFBV...,"{'href': 'https://www.reuters.com', 'title': '..."


In [8]:
# Breaking publisher column
df['Title'] = df['title']
df = pd.concat([df.drop(['publisher'], axis=1), df['publisher'].apply(pd.Series)], axis=1)
df.head(5)

Unnamed: 0,title,description,published date,url,Title,href,title.1
0,Dr. Henry J. Gold Obituary 2024 - Goldsteins F...,Dr. Henry J. Gold Obituary 2024 Goldsteins Fu...,"Sun, 14 Apr 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMiXkFVX...,Dr. Henry J. Gold Obituary 2024 - Goldsteins F...,https://obits.goldsteinsfuneral.com,Goldsteins Funeral
1,Gold slips from all-time peak on profit taking...,Gold slips from all-time peak on profit taking...,"Fri, 19 Jul 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMisAFBV...,Gold slips from all-time peak on profit taking...,https://www.reuters.com,Reuters
2,Gold prices forecast to climb to record high -...,Gold prices forecast to climb to record high ...,"Thu, 12 Sep 2024 17:42:53 GMT",https://news.google.com/rss/articles/CBMilgFBV...,Gold prices forecast to climb to record high -...,https://www.goldmansachs.com,goldmansachs.com
3,Why gold prices are at record highs - CNN,Why gold prices are at record highs CNN,"Tue, 09 Apr 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMif0FVX...,Why gold prices are at record highs - CNN,https://www.cnn.com,CNN
4,Velma Gold Obituary 2024 - Chauvin Funeral Home,Velma Gold Obituary 2024 Chauvin Funeral Home,"Sun, 18 Feb 2024 08:00:00 GMT",https://news.google.com/rss/articles/CBMiZ0FVX...,Velma Gold Obituary 2024 - Chauvin Funeral Home,https://www.chauvinfuneralhome.com,Chauvin Funeral Home


In [9]:
# Cleaning dataframe
df['Media'] = df['title'].iloc[:,-1]
df['url'] = df['url'].apply(lambda x: '=HYPERLINK("'+ x +'")')
news_df = df[['published date','Media','Title','url']]
news_df.head(20)

Unnamed: 0,published date,Media,Title,url
0,"Sun, 14 Apr 2024 07:00:00 GMT",Goldsteins Funeral,Dr. Henry J. Gold Obituary 2024 - Goldsteins F...,"=HYPERLINK(""https://news.google.com/rss/articl..."
1,"Fri, 19 Jul 2024 07:00:00 GMT",Reuters,Gold slips from all-time peak on profit taking...,"=HYPERLINK(""https://news.google.com/rss/articl..."
2,"Thu, 12 Sep 2024 17:42:53 GMT",goldmansachs.com,Gold prices forecast to climb to record high -...,"=HYPERLINK(""https://news.google.com/rss/articl..."
3,"Tue, 09 Apr 2024 07:00:00 GMT",CNN,Why gold prices are at record highs - CNN,"=HYPERLINK(""https://news.google.com/rss/articl..."
4,"Sun, 18 Feb 2024 08:00:00 GMT",Chauvin Funeral Home,Velma Gold Obituary 2024 - Chauvin Funeral Home,"=HYPERLINK(""https://news.google.com/rss/articl..."
5,"Thu, 29 Feb 2024 13:28:47 GMT",philadelphiafed.org,Price-Level Determination Under the Gold Stand...,"=HYPERLINK(""https://news.google.com/rss/articl..."
6,"Thu, 02 Feb 2023 08:00:00 GMT",Earthworks,Surviving Next to One of the World’s Largest G...,"=HYPERLINK(""https://news.google.com/rss/articl..."
7,"Fri, 17 May 2024 07:00:00 GMT",PR Newswire,NEW GOLD COMPLETES US$173 MILLION BOUGHT DEAL ...,"=HYPERLINK(""https://news.google.com/rss/articl..."
8,"Thu, 11 Jul 2024 07:00:00 GMT",Georgian Court University,Blue & Gold Days for Accepted Students - Georg...,"=HYPERLINK(""https://news.google.com/rss/articl..."
9,"Tue, 16 Jul 2024 07:00:00 GMT",MarketWatch,Why gold prices look likely to smash more reco...,"=HYPERLINK(""https://news.google.com/rss/articl..."


In [10]:
# Convert the column to datetime
news_df['Date'] = pd.to_datetime(news_df['published date'])
#Sort values and reset index
news_df = news_df.sort_values(by='Date')
news_df.reset_index(drop=True,inplace=True)
#Change column date format
news_df['Date'] = news_df['Date'].dt.strftime('%d/%m/%Y')
news_df = news_df[['Date','Media','Title','url']]
news_df 

Unnamed: 0,Date,Media,Title,url
0,02/02/2023,Earthworks,Surviving Next to One of the World’s Largest G...,"=HYPERLINK(""https://news.google.com/rss/articl..."
1,07/04/2023,Roger Ebert,Country Gold movie review & film summary (2023...,"=HYPERLINK(""https://news.google.com/rss/articl..."
2,22/04/2023,The New York Times,"Eureka! After California’s Heavy Rains, Gold S...","=HYPERLINK(""https://news.google.com/rss/articl..."
3,31/10/2023,World Gold Council,Gold Demand Trends Q3 2023 - World Gold Council,"=HYPERLINK(""https://news.google.com/rss/articl..."
4,14/11/2023,MIT News,MIT physicists turn pencil lead into “gold” - ...,"=HYPERLINK(""https://news.google.com/rss/articl..."
...,...,...,...,...
92,10/09/2024,Reuters,"Gold holds firm above $2,500 level as US infla...","=HYPERLINK(""https://news.google.com/rss/articl..."
93,12/09/2024,goldmansachs.com,Gold prices forecast to climb to record high -...,"=HYPERLINK(""https://news.google.com/rss/articl..."
94,12/09/2024,Reuters,Gold hits all-time high as Fed rate-cut hopes ...,"=HYPERLINK(""https://news.google.com/rss/articl..."
95,13/09/2024,Reuters,Gold bulls set sights on previously dismissed ...,"=HYPERLINK(""https://news.google.com/rss/articl..."


In [11]:
#Save in a document
topic = topic.replace(" ","_")
news_df.to_csv(f"./output/{topic}.csv")