<a href="https://colab.research.google.com/github/jbloewencolon/AI-News-Themes-of-the-Week/blob/main/AI_News_draft_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import nltk
nltk.download('punkt')  # One-time download for sentence splitting
nltk.download('stopwords')  # Download common 'stop words'

def get_ai_news_headlines():
    """ I grab the latest AI news headlines from the Artificial Intelligence News website.
    """

    base_url = "https://www.artificialintelligence-news.com/"
    response = requests.get(base_url)
    response.raise_for_status()  # Important to check if the website responded correctly

    soup = BeautifulSoup(response.content, 'html.parser')

    # I need to inspect the website's HTML to find the right tags for headlines
    headline_elements = soup.find_all('h3', class_='entry-title td-module-title')

    headlines = []
    for element in headline_elements:
        headline_link = element.find('a')
        # I make sure an actual link to the article exists within the headline
        if headline_link:
            headlines.append(headline_link.text.strip())

    return headlines

def analyze_headlines(headlines):
    """ I process the headlines to find meaningful keywords"""
    all_words = []

    for headline in headlines:
        words = nltk.word_tokenize(headline)
        # Lowercase for simplicity
        words = [word.lower() for word in words if word.isalpha()]
        # Filter out stop words (the, is, a, etc.)
        stop_words = set(nltk.corpus.stopwords.words('english'))
        words = [w for w in words if not w in stop_words]

        all_words.extend(words)

    # Find the most frequent words
    freq_dist = nltk.FreqDist(all_words)
    top_keywords = [word for word, count in freq_dist.most_common(10)]  # Adjust the number for more/fewer keywords

    return top_keywords

if __name__ == "__main__":
    latest_headlines = get_ai_news_headlines()
    print("This Week's Top AI News Headlines:")
    for headline in latest_headlines:
        print(headline)
    keywords = analyze_headlines(latest_headlines)
    print("This Week's Top AI Keywords:")
    print(keywords)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


This Week's Top AI News Headlines:
This Week's Top AI Keywords:
[]
