# Daily Wikipedia News Headline Emailer
## *Daniel Brinkley*

Uses the [WikiMedia API](https://api.wikimedia.org/wiki/Getting_featured_content_from_Wikipedia_with_Python) to retrieve English Wikipedia's 'In the news' section of the Main Page. Formats and forwards an HTML page of the response using SendGrid's mail client

In [1]:
# load libraries and environment (api keys)
import datetime
from dotenv import load_dotenv
import json
import requests
import os
from sendgrid import SendGridAPIClient
from sendgrid.helpers.mail import Mail
import re

load_dotenv()

True

In [None]:
# boiler plate borrowed from WikiMedia's tutorial
today = datetime.datetime.now()
date = today.strftime('%Y/%m/%d')

# Choose your language, and get today's featured content.
language_code = 'en' # English
headers = {
  'Authorization': f'Bearer {os.environ["WIKIMEDIA_API_KEY"]}',
  'User-Agent': 'email-day (brushnit.online@gmail.com)'
}

base_url = 'https://api.wikimedia.org/feed/v1/wikipedia/'
url = base_url + language_code + '/featured/' + date
response = requests.get(url, headers=headers)

headlines = []

response = json.loads(response.text)

for story in response['news']:
  headline = story['story']
  # Replace relative URLs with absolute URLs
  headline = headline.replace('"./', '"https://' + language_code + '.wikipedia.org/wiki/')
  headlines.append(headline)

{'tfa': {'type': 'standard', 'title': 'Casey_Stengel', 'displaytitle': '<span class="mw-page-title-main">Casey Stengel</span>', 'namespace': {'id': 0, 'text': ''}, 'wikibase_item': 'Q1047261', 'titles': {'canonical': 'Casey_Stengel', 'normalized': 'Casey Stengel', 'display': '<span class="mw-page-title-main">Casey Stengel</span>'}, 'pageid': 75124, 'thumbnail': {'source': 'https://upload.wikimedia.org/wikipedia/commons/0/09/Casey_Stengel_-_New_York_Yankees_-_1957.jpg', 'width': 253, 'height': 316}, 'originalimage': {'source': 'https://upload.wikimedia.org/wikipedia/commons/0/09/Casey_Stengel_-_New_York_Yankees_-_1957.jpg', 'width': 253, 'height': 316}, 'lang': 'en', 'dir': 'ltr', 'revision': '1314096187', 'tid': '1c429846-9d61-11f0-abed-03d0b278b55c', 'timestamp': '2025-09-29T18:21:26Z', 'description': 'American baseball player and manager (1890–1975)', 'description_source': 'local', 'content_urls': {'desktop': {'page': 'https://en.wikipedia.org/wiki/Casey_Stengel', 'revisions': 'https

In [4]:
# function to format HTML response; can be modified to
def create_headlines_html(headlines_list, date):
    html_output = f"""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Wikipedia Headlines {date}</title>
</head>
<body style="margin: 0; padding: 0; font-family: Arial, sans-serif; background-color: #f9f9f9;">
    <table width="100%" border="0" cellspacing="0" cellpadding="0" style="background-color: #f9f9f9;">
        <tr>
            <td align="center">
                <table width="600" border="0" cellspacing="0" cellpadding="20" style="max-width: 600px; width: 100%; background-color: #ffffff; border-radius: 8px; margin-top: 20px; margin-bottom: 20px;">
                    <tr>
                        <td>
                            <h1 style="font-size: 24px; color: #333333; margin-top: 0;">Wikinews for {date}</h1>
                            <ul style="list-style-type: none; padding: 0; margin: 0;">
    """

    for index, headline in enumerate(headlines_list):
        cleaned_headline = re.sub(r'<!--.*?-->', '', headline)

        def clean_anchor_tag(match):
            tag = match.group(0)
            href_match = re.search(r'href="([^"]+)"', tag)
            if href_match:
                href = href_match.group(1)
                content = re.sub(r'<.*?>', '', tag)
                return f'<a href="{href}" style="color: #0066cc; text-decoration: none;">{content}</a>'
            return tag 

        anchors = re.findall(r'<a[^>]*>.*?</a>', cleaned_headline)
        for anchor in anchors:
            cleaned_anchor = clean_anchor_tag(re.match(r'.*', anchor)) 
            cleaned_headline = cleaned_headline.replace(anchor, cleaned_anchor, 1)

        border_style = "border-bottom: 1px solid #eeeeee;" if index < len(headlines_list) - 1 else ""
        html_output += f"""
                                <li style="padding: 15px 0; font-size: 16px; color: #555555; {border_style}">
                                    {cleaned_headline.strip()}
                                </li>
        """
    
    html_output += """
                            </ul>

                            <!-- More Current Events Link -->
                            <p style="text-align: center; margin-top: 20px;">
                                <a href="https://en.wikipedia.org/wiki/Portal:Current_events" style="color: #0066cc; text-decoration: none; font-size: 16px;">More Current Events</a>
                            </p>

                        </td>
                    </tr>
                </table>
            </td>
        </tr>
    </table>

</body>
</html>
    """
    return html_output

In [5]:
message = Mail(
    from_email="dabcrc@umsystem.edu",
    to_emails="dabcrc@umsystem.edu",
    subject=f'Wiki News for {date}',
    html_content=create_headlines_html(headlines, date),
)
try:
    sg = SendGridAPIClient(os.environ["SENDGRID_API_KEY"])
    response = sg.send(message)
    print(response.status_code)
    print(response.body)
    print(response.headers)
except Exception as e:
    print(str(e))

202
b''
Server: nginx
Date: Mon, 29 Sep 2025 19:00:43 GMT
Content-Length: 0
Connection: close
X-Message-Id: CEzde2keTg65lCA9Zai7HQ
Access-Control-Allow-Origin: https://sendgrid.api-docs.io
Access-Control-Allow-Methods: POST
Access-Control-Allow-Headers: Authorization, Content-Type, On-behalf-of, x-sg-elas-acl
Access-Control-Max-Age: 600
X-No-CORS-Reason: https://sendgrid.com/docs/Classroom/Basics/API/cors.html
Strict-Transport-Security: max-age=31536000; includeSubDomains
Content-Security-Policy: frame-ancestors 'none'
Cache-Control: no-cache
X-Content-Type-Options: no-sniff
Referrer-Policy: strict-origin-when-cross-origin


