In [1]:
import json
from bs4 import BeautifulSoup
import requests

In [2]:
HEADERS = {
    "Accept-Encoding": "gzip, deflate, sdch",
    "Accept-Language": "en-US,en;q=0.8",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Cache-Control": "max-age=0",
    "Connection": "keep-alive",
}

jubilee_house = "https://presidency.gov.gh/press-releases"
OUTPUT_FILE = "data/press_releases_main.jsonl"


In [3]:
response = requests.get(jubilee_house, headers=HEADERS)
soup = BeautifulSoup(response.text, "html.parser")

urls = [a["href"] for a in soup.select("div.article-i-button a.button-custom")]
urls


['https://presidency.gov.gh/ghana-and-colombia-strengthen-ties-as-vice-presidents-meet-in-accra/',
 'https://presidency.gov.gh/president-mahama-arrives-in-kenya-for-jamhuri-day/',
 'https://presidency.gov.gh/president-mahama-hints-at-involving-the-red-cross-in-free-primary-healthcare-implementation/',
 'https://presidency.gov.gh/covid-19-health-recovery-levy-abolished/',
 'https://presidency.gov.gh/mahama-attends-doha-forum-2025-at-invitation-of-qatari-emir/',
 'https://presidency.gov.gh/president-mahama-hands-over-40-armoured-vehicles-to-the-police/',
 'https://presidency.gov.gh/1723-2/',
 'https://presidency.gov.gh/presidency-communications-newsletter-dec-02-2025/',
 'https://presidency.gov.gh/accra-reset-speech-by-h-e-president-olusegun-obasanjo-at-the-g20-leaders-summit-johannesburg-south-africa/',
 'https://presidency.gov.gh/president-mahama-marks-67th-birthday-visiting-childrens-wards/',
 'https://presidency.gov.gh/first-lady-interacts-with-st-marys-school-girls-donates-learning-

In [4]:
def process_article(urls: list):
    """Process Jubilee House press release articles."""
    articles = []

    for url in urls:
        print(f"Fetching: {url}")
        r = requests.get(url, headers=HEADERS)

        if r.status_code != 200:
            print(f"❌ Failed: {url}")
            continue

        soup_url = BeautifulSoup(r.text, "html.parser")

        title = soup_url.find("h1", class_="h2").get_text(strip=True)
        content = soup_url.find("div", class_="content").get_text(strip=True)
        date_tag = soup_url.find("div", class_="article-date").get_text(strip=True)

        article = {
            "title": title,
            "content": content,
            "link": url,
            "published_date": date_tag
        }

        articles.append(article)

    # Save results
    try:
        with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
            f.write(json.dumps(articles) + "\n")
        print(f"\n✅ Saved to {OUTPUT_FILE}")
    except Exception as e:
        print(f"Failed to save data! Error: {e}")

    return articles


In [5]:
articles = process_article(urls)
articles[:2]   # Preview first two articles


Fetching: https://presidency.gov.gh/ghana-and-colombia-strengthen-ties-as-vice-presidents-meet-in-accra/
Fetching: https://presidency.gov.gh/president-mahama-arrives-in-kenya-for-jamhuri-day/
Fetching: https://presidency.gov.gh/president-mahama-hints-at-involving-the-red-cross-in-free-primary-healthcare-implementation/
Fetching: https://presidency.gov.gh/covid-19-health-recovery-levy-abolished/
Fetching: https://presidency.gov.gh/mahama-attends-doha-forum-2025-at-invitation-of-qatari-emir/
Fetching: https://presidency.gov.gh/president-mahama-hands-over-40-armoured-vehicles-to-the-police/
Fetching: https://presidency.gov.gh/1723-2/
Fetching: https://presidency.gov.gh/presidency-communications-newsletter-dec-02-2025/
Fetching: https://presidency.gov.gh/accra-reset-speech-by-h-e-president-olusegun-obasanjo-at-the-g20-leaders-summit-johannesburg-south-africa/
Fetching: https://presidency.gov.gh/president-mahama-marks-67th-birthday-visiting-childrens-wards/
Fetching: https://presidency.gov.

[{'title': 'Ghana and Colombia strengthen ties as Vice Presidents meet in Accra.',
  'content': 'Vice President Jane Naana Opoku-Agyemang on Thursday welcomed her Colombian counterpart, Francia Elena Márquez Mina, to Accra, marking the second visit by Colombia’s vice president since she took office and signalling a deepening partnership between the two nations.Receiving Her Excellency Márquez Mina, Professor Opoku-Agyemang described the visit as a reaffirmation of “deep, longstanding, and mutually respectful relations” built on shared values and common aspirations.The vice president praised Márquez Mina’s record as a champion of racial equity, human dignity, and reparative justice, which are causes that resonate strongly across Latin America and beyond.She also noted a convergence between Colombia’s efforts to secure reparations for communities affected by slavery and systemic marginalisation and Ghana’s prominent role in global discussions on restorative justice.“Colombia’s commitment

In [6]:
import pandas as pd

df = pd.DataFrame(articles)

df


Unnamed: 0,title,content,link,published_date
0,Ghana and Colombia strengthen ties as Vice Pre...,Vice President Jane Naana Opoku-Agyemang on Th...,https://presidency.gov.gh/ghana-and-colombia-s...,12 December 2025
1,President Mahama arrives in Kenya for Jamhuri ...,President John Dramani Mahama has arrived in N...,https://presidency.gov.gh/president-mahama-arr...,12 December 2025
2,President Mahama hints at involving the Red Cr...,President John Dramani Mahama has indicated th...,https://presidency.gov.gh/president-mahama-hin...,11 December 2025
3,COVID‑19 Health Recovery Levy abolished.,President John Dramani Mahama has assented to ...,https://presidency.gov.gh/covid-19-health-reco...,10 December 2025
4,Mahama Attends Doha Forum 2025 at Invitation o...,"President John Dramani Mahama is in Doha, Qata...",https://presidency.gov.gh/mahama-attends-doha-...,06 December 2025
5,President Mahama hands over 40 armoured vehicl...,"President John Dramani Mahama, on Thursday, at...",https://presidency.gov.gh/president-mahama-han...,05 December 2025
6,Lordina Mahama Hosts African First Ladies for ...,"Her Excellency Dr Fatima Maada Bio, First Lady...",https://presidency.gov.gh/1723-2/,03 December 2025
7,"Presidency Communications newsletter, Dec. 02,...",Click below to read the Presidency communicati...,https://presidency.gov.gh/presidency-communica...,03 December 2025
8,Accra Reset: Speech by H.E. President Olusegun...,"Your Excellencies,Distinguished Heads of State...",https://presidency.gov.gh/accra-reset-speech-b...,03 December 2025
9,President Mahama marks 67th birthday visiting ...,President John Dramani Mahama celebrated his 6...,https://presidency.gov.gh/president-mahama-mar...,29 November 2025


In [7]:
df.to_json("data/press_releases_main.json", orient="records", lines=True)