In [3]:
# Import libraries
# GoogleNews Documentation : https://pypi.org/project/GoogleNews/
from GoogleNews import GoogleNews
from newspaper import Article
from newspaper import Config
from tqdm import tqdm
import pandas as pd
import nltk  # NLP library

In [4]:
# This tokenizer divides a text into a list of sentences by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences.
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/alfonso/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [5]:
# Config to avoid 403 forbidden error
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
config = Config()
config.browser_user_agent = user_agent
config.request_timeout = 20

In [6]:
# Initializing
googlenews = GoogleNews()

In [7]:
# Clear previous search
googlenews.clear()

In [8]:
# Settings
googlenews = GoogleNews(lang='en',start='01/06/2023',end='01/06/2023')

In [9]:
# Search 
googlenews.search('Ukraine')
print(f"Articles found:", googlenews.total_count())

Articles found: 39500


In [10]:
# Results
result = googlenews.result()

In [11]:
# Convert to DataFrame
df = pd.DataFrame(result)
df.head(20)

Unnamed: 0,title,media,date,datetime,desc,link,img
0,,,,NaT,,https://support.google.com/websearch/answer/10...,
1,More Than $3.75 Billion in U.S. Military Assis...,State Department,1 week ago,2023-01-08 19:26:37.913543,"In the longer term, this Foreign Military Fina...",https://www.state.gov/more-than-3-75-billion-i...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
2,Ukraine war: Western allies to send fighting v...,BBC,1 week ago,2023-01-08 19:26:37.933854,Germany and the US have agreed to join France ...,https://www.bbc.com/news/world-europe-64184430,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
3,Germany to Send Ukraine 40 Armored Vehicles by...,VOA,1 week ago,2023-01-08 19:26:37.949057,Germany will supply Ukraine with around 40 Mar...,https://www.voanews.com/a/germany-to-send-ukra...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
4,Ukraine war: Russians accused of opening fire ...,BBC,1 week ago,2023-01-08 19:26:37.962944,It said its forces had only returned fired dur...,https://www.bbc.com/news/world-europe-64187704,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
5,"Russia's war on Ukraine (UN, Ukraine governmen...",ReliefWeb,1 week ago,2023-01-08 19:26:37.978359,Ukraine rejected the Russian-proposed ceasefir...,https://reliefweb.int/report/ukraine/russias-w...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
6,Kyiv rejects Putin’s ceasefire: ‘We will bite ...,EL PAÍS in English,1 week ago,2023-01-08 19:26:37.994242,The Ukrainian government has reacted with susp...,https://english.elpais.com/international/2023-...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
7,"Thanks To TikTok, Ukraine Is Raining Missile O...",EurAsian Times,1 week ago,2023-01-08 19:26:38.007771,Ukrainian forces have been precisely targeting...,https://eurasiantimes.com/thanks-to-tiktok-ukr...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
8,Ukraine under largest economic slump post-inde...,Helsinki Times,1 week ago,2023-01-08 19:26:38.023401,In the ongoing conflict between Ukraine and Ru...,https://www.helsinkitimes.fi/world-int/22772-u...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
9,Pope prays for Ukraine as Eastern Churches cel...,Vatican News,1 week ago,2023-01-08 19:26:38.039678,Pope Francis renews his prayers for peace in w...,https://www.vaticannews.va/en/pope/news/2023-0...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."


In [12]:
# Checking one article
df.iloc[8]['desc']

'In the ongoing conflict between Ukraine and Russia which has resulted in widespread devastation of life and property, Ukraine has suffered its sharpest...'

In [16]:
# Unfortunately there is a search limit on the number of news in this api. Maximum number is 10. Now, we will try to fetch more than that.
for i in tqdm(range(2,10), colour="red", desc="Getting news links from Google"):
  googlenews.getpage(i)
  result = googlenews.result()
  df = pd.DataFrame(result)

Getting news links from Google: 100%|[31m██████████[0m| 8/8 [00:14<00:00,  1.85s/it]


In [14]:
df.head(30)

Unnamed: 0,title,media,date,datetime,desc,link,img
0,,,,NaT,,https://support.google.com/websearch/answer/10...,
1,More Than $3.75 Billion in U.S. Military Assis...,State Department,1 week ago,2023-01-08 19:26:37.913543,"In the longer term, this Foreign Military Fina...",https://www.state.gov/more-than-3-75-billion-i...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
2,Ukraine war: Western allies to send fighting v...,BBC,1 week ago,2023-01-08 19:26:37.933854,Germany and the US have agreed to join France ...,https://www.bbc.com/news/world-europe-64184430,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
3,Germany to Send Ukraine 40 Armored Vehicles by...,VOA,1 week ago,2023-01-08 19:26:37.949057,Germany will supply Ukraine with around 40 Mar...,https://www.voanews.com/a/germany-to-send-ukra...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
4,Ukraine war: Russians accused of opening fire ...,BBC,1 week ago,2023-01-08 19:26:37.962944,It said its forces had only returned fired dur...,https://www.bbc.com/news/world-europe-64187704,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
5,"Russia's war on Ukraine (UN, Ukraine governmen...",ReliefWeb,1 week ago,2023-01-08 19:26:37.978359,Ukraine rejected the Russian-proposed ceasefir...,https://reliefweb.int/report/ukraine/russias-w...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
6,Kyiv rejects Putin’s ceasefire: ‘We will bite ...,EL PAÍS in English,1 week ago,2023-01-08 19:26:37.994242,The Ukrainian government has reacted with susp...,https://english.elpais.com/international/2023-...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
7,"Thanks To TikTok, Ukraine Is Raining Missile O...",EurAsian Times,1 week ago,2023-01-08 19:26:38.007771,Ukrainian forces have been precisely targeting...,https://eurasiantimes.com/thanks-to-tiktok-ukr...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
8,Ukraine under largest economic slump post-inde...,Helsinki Times,1 week ago,2023-01-08 19:26:38.023401,In the ongoing conflict between Ukraine and Ru...,https://www.helsinkitimes.fi/world-int/22772-u...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."
9,Pope prays for Ukraine as Eastern Churches cel...,Vatican News,1 week ago,2023-01-08 19:26:38.039678,Pope Francis renews his prayers for peace in w...,https://www.vaticannews.va/en/pope/news/2023-0...,"data:image/gif;base64,R0lGODlhAQABAIAAAP//////..."


In [22]:
# Get the full article extracting and parsing the article and drop the links that give problems

list = []

for ind in tqdm(df.index, colour="green", desc="Extracting articles from the internet"):
  dict = {}
  
  try:
    article = Article(df['link'][ind],config = config)
    article.download()
    article.parse()
    article.nlp()
    dict['Date'] = df['date'][ind]
    dict['Media'] = df['media'][ind]
    dict['Title'] = article.title
    dict['Article'] = article.text
    dict['Summary'] = article.summary
    list.append(dict)
  except Exception:
    df.drop([ind],axis=0,inplace=True)



Extracting articles from the internet: 100%|[32m██████████[0m| 271/271 [06:12<00:00,  1.38s/it]


In [24]:
# Create the news dataframe with full articles
news_df = pd.DataFrame(list)
news_df.head(20)

Unnamed: 0,Date,Media,Title,Article,Summary
0,,,Manage Google autocomplete predictions,"With autocomplete, you can enter a Google sear...",You can turn off or remove certain autocomplet...
1,1 week ago,State Department,More Than $3.75 Billion in U.S. Military Assis...,"In this first week of 2023, the United States ...",This assistance includes a $2.85 billion drawd...
2,1 week ago,BBC,Ukraine war: Western allies to send fighting v...,There was near-unanimous support for the Germa...,There was near-unanimous support for the Germa...
3,1 week ago,VOA,Germany to Send Ukraine 40 Armored Vehicles by...,Germany will supply Ukraine with around 40 Mar...,Germany will supply Ukraine with around 40 Mar...
4,1 week ago,BBC,Ukraine war: Russians accused of opening fire ...,"""We are two and a half hours into this proclai...","""We are two and a half hours into this proclai..."
5,1 week ago,ReliefWeb,"Russia’s war on Ukraine (UN, Ukraine governmen...",What are you looking for?\n\nSearch|t,What are you looking for?
6,1 week ago,EL PAÍS in English,Kyiv rejects Putin’s ceasefire: ‘We will bite ...,The Ukrainian government has reacted with susp...,The Ukrainian government has reacted with susp...
7,1 week ago,EurAsian Times,"Thanks To TikTok, Ukraine Is Raining Missile O...",Ukrainian forces have been precisely targeting...,Ukrainian forces have been precisely targeting...
8,1 week ago,Vatican News,Pope prays for Ukraine as Eastern Churches cel...,Pope Francis renews his prayers for peace in w...,Pope Francis renews his prayers for peace in w...
9,1 week ago,World Politics Review,Why the War in Ukraine Hasn’t Polarized Wester...,"Or, now to get full access.\n\nWhat you’ll get...","Or, now to get full access.\nWhat you’ll get w..."


In [25]:
# Checking one article
news_df.iloc[8]['Article']

"Pope Francis renews his prayers for peace in war-torn Ukraine, and sends his well-wishes to Eastern-rite Catholics and Orthodox Christians as they prepare to celebrate Christmas on Saturday.\n\nBy Devin Watkins\n\nAs Christian Churches which follow the Julian calendar prepare to celebrate Christmas on 7 January, Pope Francis expressed his well-wishes to Eastern-rite Catholics and Orthodox Christians.\n\n“In a special way,” he said at the Angelus prayer on Friday, “I would like to send greetings to our brothers and sisters of martyred Ukrainian people.”\n\nOver 65 percent of Ukrainians profess Orthodoxy and another 9 percent belong to the Ukrainian Greek Catholic Church, according to a 2018 survey by the Razumkov Centre.\n\nThe Pope added his hopes that the celebration of the birth of the Lord might encourage steps toward peace.\n\n“May the birth of the Saviour bring comfort and hope, and inspire concrete steps that may finally lead to an end to the fighting and to peace. May we pray o