In [9]:
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup

Base_URL = "https://news.abs-cbn.com/list/tag/tfc-news?page="
PageNumber = 1
NumPages = 10

DateMonth = "Jun" # Must be 3 letters (Jan, Feb, Mar, Apr)
DateFrom = 17
DateTo = 18
SearchedAuthor = None # Add string of author name if also searching by author

In [10]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64'}
page = requests.get("".join((Base_URL, str(PageNumber))), headers = headers)
soup = BeautifulSoup(page.content, 'html.parser')
news_content = soup.find("section", class_ = "section-more-stories")
news_list = news_content.find_all("article")

In [11]:
# Scrape and format articles

all_articles = []

for PageNumber in range(NumPages):
    page = requests.get("".join((Base_URL, str(PageNumber))), headers = headers)
    soup = BeautifulSoup(page.content, 'html.parser')
    news_content = soup.find("section", class_ = "section-more-stories")
    news_list = news_content.find_all("article")

    for ctr in range(len(news_list)):
        article = {}

        date = news_list[ctr].find("span", class_="datetime").text
        title = news_list[ctr].find("p", class_="title").text
        full_article = "".join(("https://news.abs-cbn.com", news_list[ctr].find("a").get("href")))
        author = news_list[ctr].find("span", class_="author").text.split(" |")[0]
        
        if SearchedAuthor is None:
            if date[0:3] == DateMonth and int(date[4:6]) <= DateTo and int(date[4:6]) >= DateFrom:
                article = {
                    "Date and Time": date,
                    "Title": title,
                    "Full Article": full_article,
                    "Author": author
                }

                all_articles.append(article)
        else:
            if SearchedAuthor == author and date[0:3] == DateMonth and int(date[4:6]) <= DateTo and int(date[4:6]) >= DateFrom:
                article = {
                    "Date and Time": date,
                    "Title": title,
                    "Full Article": full_article,
                    "Author": author
                }

                all_articles.append(article)
    
all_articles

[{'Date and Time': 'Jun 18 05:19 PM',
  'Title': 'Community rallies for Filipino family attacked in California',
  'Full Article': 'https://news.abs-cbn.com/news/06/18/22/community-rallies-for-filipino-family-attacked-in-california',
  'Author': 'Steve Angeles'},
 {'Date and Time': 'Jun 18 10:46 AM',
  'Title': 'Historical highway marker honors Fil-Am US Navy sailors',
  'Full Article': 'https://news.abs-cbn.com/news/06/18/22/historical-highway-marker-honors-fil-am-us-navy-sailors',
  'Author': 'Monica Galozo'},
 {'Date and Time': 'Jun 17 11:43 PM',
  'Title': 'BALIK-TANAW: Bela Padilla, nagtanghal sa PH Independence Day celebrations sa Norway ',
  'Full Article': 'https://news.abs-cbn.com/news/06/17/22/balik-tanaw-bela-padilla-nagtanghal-sa-ph-independence-day-celebrations-sa-norway',
  'Author': 'Marco Camas'},
 {'Date and Time': 'Jun 17 11:10 PM',
  'Title': "'Father of the Bride' explores modern take on traditional roles",
  'Full Article': 'https://news.abs-cbn.com/entertainment/0

In [12]:
# Dump articles to JSON file

with open('esquivel_chua_api.json', 'w') as fp:
    json.dump(all_articles, fp, indent = 4)

In [13]:
# Read JSON file

jsonFile = pd.read_json("esquivel_chua_api.json", orient = "records")
print(jsonFile)

     Date and Time                                              Title  \
0  Jun 18 05:19 PM  Community rallies for Filipino family attacked...   
1  Jun 18 10:46 AM  Historical highway marker honors Fil-Am US Nav...   
2  Jun 17 11:43 PM  BALIK-TANAW: Bela Padilla, nagtanghal sa PH In...   
3  Jun 17 11:10 PM  'Father of the Bride' explores modern take on ...   
4  Jun 18 05:19 PM  Community rallies for Filipino family attacked...   
5  Jun 18 10:46 AM  Historical highway marker honors Fil-Am US Nav...   
6  Jun 17 11:43 PM  BALIK-TANAW: Bela Padilla, nagtanghal sa PH In...   
7  Jun 17 11:10 PM  'Father of the Bride' explores modern take on ...   

                                        Full Article         Author  
0  https://news.abs-cbn.com/news/06/18/22/communi...  Steve Angeles  
1  https://news.abs-cbn.com/news/06/18/22/histori...  Monica Galozo  
2  https://news.abs-cbn.com/news/06/17/22/balik-t...    Marco Camas  
3  https://news.abs-cbn.com/entertainment/06/17/2...    Yong C