In [2]:
# Import libraries
# GoogleNews Documentation : https://pypi.org/project/gnews/
from gnews import GNews
import pandas as pd
import nltk  # NLP library
import warnings
from datetime import date

warnings.filterwarnings('ignore')

In [3]:
# This tokenizer divides a text into a list of sentences by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences.
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/alfonso/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
# Initializing
googlenews = GNews()

In [5]:
# Settings
today = date.today()
googlenews = GNews(start_date=(2023, 1, 1), end_date=(today.year,today.month,today.day))

In [6]:
# Search 
topic = "Alcon"
searchednews = googlenews.get_news(topic)
print(f"Articles found:", len(searchednews))

Articles found: 98


In [7]:
# Results
print(searchednews[1])

{'title': 'NECO Students and Alcon Experience Academy - New England College of Optometry', 'description': 'NECO Students and Alcon Experience Academy  New England College of Optometry', 'published date': 'Fri, 19 Jul 2024 07:00:00 GMT', 'url': 'https://news.google.com/rss/articles/CBMieEFVX3lxTE1ZZFJGazZFbXBvYmladWxaTzMzM19sOWYwOFpyX2EzU3R0S0hHMUJfMllRQlBvWDFUWEFTSnB2V3U5WDRBVWMxWVBYT2E5VEZ0U1h6NmlGUlFFSENFZFJucm0wVlJsVm5XUV94a24yM1hZS3FMN1ItNw?oc=5&hl=en-US&gl=US&ceid=US:en', 'publisher': {'href': 'https://www.neco.edu', 'title': 'New England College of Optometry'}}


In [8]:
# Convert to DataFrame
df = pd.DataFrame(searchednews)
df.tail(20)

Unnamed: 0,title,description,published date,url,publisher
78,"Sue Alcon O’Connor drew on real family, fictio...","Sue Alcon O’Connor drew on real family, fictio...","Sun, 17 Nov 2024 08:00:00 GMT",https://news.google.com/rss/articles/CBMikgFBV...,"{'href': 'https://coloradosun.com', 'title': '..."
79,Alcon Appoints Duke University Professor as Ch...,Alcon Appoints Duke University Professor as Ch...,"Tue, 02 Apr 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMilgFBV...,"{'href': 'https://fortworthinc.com', 'title': ..."
80,Alcon Laboratories issues recall for Systane e...,Alcon Laboratories issues recall for Systane e...,"Tue, 31 Dec 2024 08:00:00 GMT",https://news.google.com/rss/articles/CBMi7gFBV...,"{'href': 'https://topclassactions.com', 'title..."
81,Elon Musk sued for using AI-generated Blade Ru...,Elon Musk sued for using AI-generated Blade Ru...,"Mon, 21 Oct 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMiwwFBV...,"{'href': 'https://www.theverge.com', 'title': ..."
82,Alcon Laboratories Recalls Systane Eye Drops N...,Alcon Laboratories Recalls Systane Eye Drops N...,"Thu, 26 Dec 2024 08:00:00 GMT",https://news.google.com/rss/articles/CBMikgFBV...,"{'href': 'https://www.blackenterprise.com', 't..."
83,M&A: Alcon Acquires BELKIN Vision for $81M - H...,M&A: Alcon Acquires BELKIN Vision for $81M HI...,"Mon, 01 Jul 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMigwFBV...,"{'href': 'https://hitconsultant.net', 'title':..."
84,'He was a fighter': Former NM state Rep. Elise...,'He was a fighter': Former NM state Rep. Elise...,"Tue, 14 Jan 2025 06:04:32 GMT",https://news.google.com/rss/articles/CBMiiAFBV...,"{'href': 'https://www.abqjournal.com', 'title'..."
85,Alcon breaks ground on $70 million expansion -...,Alcon breaks ground on $70 million expansion ...,"Thu, 26 Oct 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiygFBV...,"{'href': 'https://www.herald-dispatch.com', 't..."
86,SMARTCataract cloud-based app from Alcon - Oph...,SMARTCataract cloud-based app from Alcon Opht...,"Sat, 16 Dec 2023 08:00:00 GMT",https://news.google.com/rss/articles/CBMihwFBV...,"{'href': 'https://www.ophthalmologytimes.com',..."
87,Do ‘Blade Runner 2049’ Producers Have a Case A...,Do ‘Blade Runner 2049’ Producers Have a Case A...,"Thu, 24 Oct 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMiqgFBV...,"{'href': 'https://variety.com', 'title': 'Vari..."


In [9]:
# Breaking publisher column
df['Title'] = df['title']
df = pd.concat([df.drop(['publisher'], axis=1), df['publisher'].apply(pd.Series)], axis=1)
df.head(5)

Unnamed: 0,title,description,published date,url,Title,href,title.1
0,Alcon Expedites Insights with an Agile Enterpr...,Alcon Expedites Insights with an Agile Enterpr...,"Wed, 04 Oct 2023 21:40:10 GMT",https://news.google.com/rss/articles/CBMicEFVX...,Alcon Expedites Insights with an Agile Enterpr...,https://aws.amazon.com,AWS Blog
1,NECO Students and Alcon Experience Academy - N...,NECO Students and Alcon Experience Academy Ne...,"Fri, 19 Jul 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMieEFVX...,NECO Students and Alcon Experience Academy - N...,https://www.neco.edu,New England College of Optometry
2,Alcon Innovations on Display at AAO 2024 Demon...,Alcon Innovations on Display at AAO 2024 Demon...,"Tue, 15 Oct 2024 07:00:00 GMT",https://news.google.com/rss/articles/CBMi0gFBV...,Alcon Innovations on Display at AAO 2024 Demon...,https://www.businesswire.com,Business Wire
3,Alcon to pay Johnson & Johnson $199 mln to set...,Alcon to pay Johnson & Johnson $199 mln to set...,"Mon, 13 Feb 2023 08:00:00 GMT",https://news.google.com/rss/articles/CBMinwFBV...,Alcon to pay Johnson & Johnson $199 mln to set...,https://www.reuters.com,Reuters
4,Alcon Laboratories is considering a major $100...,Alcon Laboratories is considering a major $100...,"Sat, 21 Oct 2023 07:00:00 GMT",https://news.google.com/rss/articles/CBMiggFBV...,Alcon Laboratories is considering a major $100...,https://www.star-telegram.com,Fort Worth Star-Telegram


In [10]:
# Cleaning dataframe
df['Media'] = df['title'].iloc[:,-1]
df['url'] = df['url'].apply(lambda x: '=HYPERLINK("'+ x +'")')
news_df = df[['published date','Media','Title','url']]
news_df.head(20)

Unnamed: 0,published date,Media,Title,url
0,"Wed, 04 Oct 2023 21:40:10 GMT",AWS Blog,Alcon Expedites Insights with an Agile Enterpr...,"=HYPERLINK(""https://news.google.com/rss/articl..."
1,"Fri, 19 Jul 2024 07:00:00 GMT",New England College of Optometry,NECO Students and Alcon Experience Academy - N...,"=HYPERLINK(""https://news.google.com/rss/articl..."
2,"Tue, 15 Oct 2024 07:00:00 GMT",Business Wire,Alcon Innovations on Display at AAO 2024 Demon...,"=HYPERLINK(""https://news.google.com/rss/articl..."
3,"Mon, 13 Feb 2023 08:00:00 GMT",Reuters,Alcon to pay Johnson & Johnson $199 mln to set...,"=HYPERLINK(""https://news.google.com/rss/articl..."
4,"Sat, 21 Oct 2023 07:00:00 GMT",Fort Worth Star-Telegram,Alcon Laboratories is considering a major $100...,"=HYPERLINK(""https://news.google.com/rss/articl..."
5,"Sun, 02 Feb 2025 08:00:00 GMT",Cumby Family Funeral Service,Obituary information for Florence Diane Philli...,"=HYPERLINK(""https://news.google.com/rss/articl..."
6,"Mon, 25 Nov 2024 08:00:00 GMT",Source New Mexico,New Mexico representative Eliseo Alcon resigns...,"=HYPERLINK(""https://news.google.com/rss/articl..."
7,"Mon, 13 Feb 2023 08:00:00 GMT",Fierce Biotech,Alcon reaches $199M settlement with J&J Vision...,"=HYPERLINK(""https://news.google.com/rss/articl..."
8,"Thu, 16 Nov 2023 08:00:00 GMT",Optometry Times,AAOpt 2023: Pipeline update from Alcon - Optom...,"=HYPERLINK(""https://news.google.com/rss/articl..."
9,"Wed, 10 Jan 2024 08:00:00 GMT",BioSpace,Alcon’s Investigational Drops Show Promising P...,"=HYPERLINK(""https://news.google.com/rss/articl..."


In [11]:
# Convert the column to datetime
news_df['Date'] = pd.to_datetime(news_df['published date'],format="mixed")
#Sort values and reset index
news_df = news_df.sort_values(by='Date')
news_df.reset_index(drop=True,inplace=True)
#Change column date format
news_df['Date'] = news_df['Date'].dt.strftime('%d/%m/%Y')
news_df = news_df[['Date','Media','Title','url']]
news_df 

Unnamed: 0,Date,Media,Title,url
0,10/01/2023,The Dallas Morning News,FWISD and Alcon open on-campus vision center -...,"=HYPERLINK(""https://news.google.com/rss/articl..."
1,02/02/2023,Business Wire,Alcon Canada launches new TOTAL toric lenses f...,"=HYPERLINK(""https://news.google.com/rss/articl..."
2,13/02/2023,Reuters,Alcon to pay Johnson & Johnson $199 mln to set...,"=HYPERLINK(""https://news.google.com/rss/articl..."
3,13/02/2023,Fierce Biotech,Alcon reaches $199M settlement with J&J Vision...,"=HYPERLINK(""https://news.google.com/rss/articl..."
4,14/02/2023,Optics.org,Alcon pays $199M to settle femtosecond laser c...,"=HYPERLINK(""https://news.google.com/rss/articl..."
...,...,...,...,...
93,28/01/2025,Bloomberg Law,"Alcon Loses Bid to Veto Aurion IPO, Can Still ...","=HYPERLINK(""https://news.google.com/rss/articl..."
94,28/01/2025,Fierce Biotech,"Judge rules in favor of Aurion in IPO suit, a ...","=HYPERLINK(""https://news.google.com/rss/articl..."
95,02/02/2025,Cumby Family Funeral Service,Obituary information for Florence Diane Philli...,"=HYPERLINK(""https://news.google.com/rss/articl..."
96,12/02/2025,Bloomberg Law,Alcon Loses Patent Ruling on Padagis Copy of S...,"=HYPERLINK(""https://news.google.com/rss/articl..."


In [12]:
#Save in a document
topic = topic.replace(" ","_")
news_df.to_csv(f"./output/{topic}.csv")