In [39]:
import requests
from bs4 import BeautifulSoup
from openai import OpenAI

In [40]:
location = "san francisco bay area"
topic = "sports"

In [41]:
def scrape_article(url):
    # Send a request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code != 200:
        raise Exception(f"Failed to load page, status code: {response.status_code}")

    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the relevant content
    # This part depends on the structure of the website. We'll use an example of a common structure.
    article_content = ''

    # Many news websites use <article> tag or specific class names for article content
    article = soup.find('article')
    if article:
        paragraphs = article.find_all('p')
    else:
        # Fallback to a generic method, searching for <p> tags within a div with a common class name
        paragraphs = soup.find_all('p')

    for p in paragraphs:
        article_content += p.get_text() + '\n'

    return article_content.strip()

In [42]:
def get_news_search_results(query, num_results):
    # Google news search URL with query
    search_url = f"https://www.google.com/search?q={query}&tbm=nws&num={num_results}"

    # Send a request to Google Search
    response = requests.get(search_url, headers={"User-Agent": "Mozilla/5.0"})
    response.raise_for_status()  # Check if the request was successful

    # Parse the response content with BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract the news headlines
    headlines = []
    for item in soup.find_all('div', class_='BNeawe vvjwJb AP7Wnd'):
        headlines.append(item.get_text())

    return headlines

In [43]:
def prompt_openai(prompt):
    api_key = "sk-proj-Lre5oxv7aR6TbvJudStDT3BlbkFJLooMOCbPege0CGbgaWnY"
    client = OpenAI(api_key=api_key)
    completion = client.chat.completions.create(
      model="gpt-4o",
      messages=[
        {"role": "user", "content": prompt}
      ]
    )
    return completion.choices[0].message.content

In [47]:
# Define the query and the number of results
query = f"{location} local news in the past month about {topic}"
num_results = 250
titles = []

# Get the news search results
news_search_results = get_news_search_results(query, num_results)

# Print the results
for i, result in enumerate(news_search_results, 1):
    # print(f"{i}. {result}")
    titles.append(result)

prompt = f"""For the following list, give me a list of important distinct events that are referenced by several articles (i.e. a short blurb). 
{titles}
It should be formatted as a numbered list. The event should be something distinct and not a general topic — i.e. the Golden Gate Bridge has shut down."
If it's not related to {topic} in {location}, don't account for it. If it's an opinion article or a guide, don't account for it.
"""
response_text = prompt_openai(prompt)
print(response_text)

1. **Giants Snap Losing Streak**: The San Francisco Giants recently ended a losing streak, which has been a point of discussion in various sports recaps.

2. **Rangers Rally Past the A's**: The Texas Rangers defeated the Oakland Athletics with a score of 4-2, highlighting a noteworthy game in local sports.

3. **49ers Headed to Super Bowl**: The San Francisco 49ers are advancing to their eighth Super Bowl, generating excitement among fans.

4. **A's Plan Falls Apart & New Oakland Waterfront Twist**: After a failed plan, a significant new development has emerged regarding the future of Oakland's waterfront, impacting the city's sports landscape.

5. **Bay Area College Basketball Teams Preparing for March Madness**: Local college basketball teams in the Bay Area are gearing up and have work to do in anticipation of the 2024 March Madness tournament.

6. **Golden State Valkyries WNBA Expansion**: The newly announced Bay Area expansion franchise for the WNBA has unveiled its name and merch