In [1]:
# tutorial
# https://medium.com/swlh/tutorial-creating-a-webpage-monitor-using-python-and-running-it-on-a-raspberry-pi-df763c142dac 

# https://www.datacamp.com/tutorial/how-to-send-slack-messages-with-python 


In [4]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re

In [5]:
page = urlopen("https://www.aisixiang.com/rss?type=1")
xml = page.read().decode("utf-8")
rss_soup = BeautifulSoup(xml, features='xml')

In [6]:
articles = rss_soup.find_all('item')
# articles

In [7]:
most_recent_time_string = articles[0].pubDate.text
most_recent_time_string

'Sunday, 03 March 2024 11:35:34 +0800'

In [8]:
import logging
import os
# Import WebClient from Python SDK (github.com/slackapi/python-slack-sdk)
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

# WebClient instantiates a client that can call API methods
# When using Bolt, you can use either `app.client` or the `client` passed to listeners.

filehandle = open("slackbot_token.txt", 'r')
SLACK_BOT_TOKEN = filehandle.read() 
filehandle.close()

client = WebClient(token=SLACK_BOT_TOKEN)
logger = logging.getLogger(__name__)

In [13]:
from datetime import datetime

def notify(article):
    print(f"\nNew article: {article.title.text}")
    message = article.title.text + "\n" + article.description.text +"\n"  + article.link.text 
    # determine where you want to send the message to
    client.chat_postMessage(
    channel="think-tank-monitoring", 
    text=message
    )

def identify_new_articles(previous_most_recent_timestamp, articles):
    time_format = '%A, %d %B %Y %H:%M:%S %z'

    # convert the timestamp string to a datetime object
    reference_time = datetime.strptime(previous_most_recent_timestamp, time_format)
    print(f"\nReference time: {reference_time}")

    for article in articles:
        article_time = article.pubDate.text
        article_time = datetime.strptime(article_time, time_format)

        if article_time > reference_time:
            notify(article)



In [14]:
import os

if not os.path.exists("previous_most_recent_timestamp.txt"):
    open("previous_most_recent_timestamp.txt", 'w+').close()

filehandle = open("previous_most_recent_timestamp.txt", 'r')
previous_most_recent_timestamp = filehandle.read() 
filehandle.close()

# in case this is the first run, set the timestamp to be the oldest in the rss file
if previous_most_recent_timestamp == "":
    previous_most_recent_timestamp = articles[-1].pubDate.text

print(f"Previous most recent timestamp: {previous_most_recent_timestamp}")

if most_recent_time_string == previous_most_recent_timestamp:
    # matches, no new info
    print(False)
else:
    # does not match, send notifications for new ones
    filehandle = open("previous_most_recent_timestamp.txt", 'w')

    identify_new_articles(previous_most_recent_timestamp, articles)

    filehandle.write(most_recent_time_string)
    filehandle.close()
    print(True)

Previous most recent timestamp: Saturday, 02 March 2024 09:54:05 +0800

Reference time: 2024-03-02 09:54:05+08:00

New article: 卢锋：强供给与弱需求——中国经济如何再平衡？

New article: 巨力：经济形势应该怎么看
True


In [None]:
# sending slack messages with a bot: https://api.slack.com/messaging/sending 

# cron: 0 */6 * * *

In [3]:
from datetime import datetime

print(f"No new articles. Current time: {datetime.now()}")

No new articles. Current time: 2024-03-02 10:25:07.627113
