In [61]:
import requests
from bs4 import BeautifulSoup
from openai import OpenAI

In [62]:
location = "san francisco bay area"
topic = "sports"

In [63]:
def scrape_article(url):
    # Send a request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code != 200:
        raise Exception(f"Failed to load page, status code: {response.status_code}")

    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the relevant content
    # This part depends on the structure of the website. We'll use an example of a common structure.
    article_content = ''

    # Many news websites use <article> tag or specific class names for article content
    article = soup.find('article')
    if article:
        paragraphs = article.find_all('p')
    else:
        # Fallback to a generic method, searching for <p> tags within a div with a common class name
        paragraphs = soup.find_all('p')

    for p in paragraphs:
        article_content += p.get_text() + '\n'

    return article_content.strip()

In [64]:
def get_news_search_results(query, num_results):
    # Google news search URL with query
    search_url = f"https://www.google.com/search?q={query}&tbm=nws&num={num_results}"

    # Send a request to Google Search
    response = requests.get(search_url, headers={"User-Agent": "Mozilla/5.0"})
    response.raise_for_status()  # Check if the request was successful

    # Parse the response content with BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract the news headlines
    headlines = []
    for item in soup.find_all('div', class_='BNeawe vvjwJb AP7Wnd'):
        headlines.append(item.get_text())

    return headlines

In [65]:
def prompt_openai(prompt):
    api_key = "your key here"
    client = OpenAI(api_key=api_key)
    completion = client.chat.completions.create(
      model="gpt-4o",
      messages=[
        {"role": "user", "content": prompt}
      ]
    )
    return completion.choices[0].message.content

In [66]:
# Define the query and the number of results
query = f"{location} local news in the past month about {topic}"
num_results = 250
titles = []

# Get the news search results
news_search_results = get_news_search_results(query, num_results)

# Print the results
for i, result in enumerate(news_search_results, 1):
    # print(f"{i}. {result}")
    titles.append(result)

prompt = f"""For the following list, give me a list of important distinct events that are referenced by several articles (i.e. a short blurb). 
{titles}
It should be formatted as a python array. The event should be something distinct and not a general topic — i.e. the Golden Gate Bridge has shut down."
If it's not related to {topic} in {location}, don't account for it. If it's an opinion article or a guide, don't account for it.
"""
response_text = prompt_openai(prompt)
print(response_text)

```python
[
    "Giants snap losing streak",
    "Rangers rally past the A's 4-2",
    "Major events coming to San Francisco",
    "San Francisco 49ers headed to their eighth Super Bowl and Taylor Swift's appearance",
    "Enjoy a thrilling Super Bowl Sunday in the Bay with local watch parties",
    "Bay Area waterway plagued by pirates and vigilantism",
    "Golden State Valkyries: WNBA's new Bay Area expansion franchise",
    "National Women's Soccer League announces new San Francisco Bay Area expansion team with former players in ownership group",
    "Legendary San Francisco Sports Bar heavily damaged in fire",
    "Oracle Park Watch Party to honor Willie Mays' legacy",
    "Massive $6.5 billion Santa Clara development approved near new 49ers stadium",
    "Bay Area sportscaster Gary Radnich announces his retirement from KNBR"
]
```
