In [1]:
import requests
from bs4 import BeautifulSoup

def scrape_event_links():
    event_links = []
    base_url = "https://visitseattle.org/events/"
    page = 1
    max_pages = 41

    while page <= max_pages:
        response = requests.get(f"{base_url}page/{page}/")
        if response.status_code != 200:
            break

        soup = BeautifulSoup(response.content, 'html.parser')
        for a_tag in soup.find_all('a', href=True):
            if '/events/' in a_tag['href'] and a_tag['href'] not in event_links:
                event_links.append(a_tag['href'])

        page += 1

    return event_links

event_links = scrape_event_links()

print(event_links)


['https://visitseattle.org/things-to-do/events/', 'https://visitseattle.org/things-to-do/events/festivals/', 'https://visitseattle.org/things-to-do/events/submit-your-event/', 'https://visitseattle.org/events/community-is-a-radical-act-of-love/', 'https://visitseattle.org/events/seattle-kraken-vs-detroit-red-wings/', 'https://visitseattle.org/events/the-thrifted/', 'https://visitseattle.org/events/black-and-boujee/', 'https://visitseattle.org/events/from-mozart-to-matthews/', 'https://visitseattle.org/events/lukas-nelson/', 'https://www.showboxpresents.com/events/detail/517112', 'https://visitseattle.org/events/mahogany-smartfilms-black-luxury/', 'https://visitseattle.org/events/robots-who-paint/', 'https://visitseattle.org/events/trask/', 'https://visitseattle.org/events/page/2/?frm=events&s', 'https://visitseattle.org/events/page/3/?frm=events&s', 'https://visitseattle.org/events/page/4/?frm=events&s', 'https://visitseattle.org/events/page/5/?frm=events&s', 'https://visitseattle.org/

In [2]:
import csv

def scrape_event_details(url):
    response = requests.get(url)
    if response.status_code != 200:
        return None

    soup = BeautifulSoup(response.content, 'html.parser')

    # Using CSS selectors with select_one
    name_element = soup.select_one('#body > div.global-wrapper > div.container-event-detail.padding-top-bottom > div:nth-child(1) > div.medium-6.columns.event-top > h1')
    date_element = soup.select_one('#body > div.global-wrapper > div.container-event-detail.padding-top-bottom > div:nth-child(1) > div.medium-6.columns.event-top > h4')
    location_element = soup.select_one('#body > div.global-wrapper > div.container-event-detail.padding-top-bottom > div:nth-child(1) > div.medium-6.columns.event-top > h4 > span:nth-child(2)')
    type_element = soup.select_one('#body > div.global-wrapper > div.container-event-detail.padding-top-bottom > div:nth-child(1) > div.medium-6.columns.event-top > h4 > span:nth-child(2)')
    region_element = soup.select_one('#body > div.global-wrapper > div.container-event-detail.padding-top-bottom > div:nth-child(1) > div.medium-6.columns.event-top > h4 > span:nth-child(2)')

    name = name_element.get_text().strip() if name_element else 'Not Available'
    date = date_element.get_text().strip() if date_element else 'Not Available'
    location = location_element.get_text().strip() if location_element else 'Not Available'
    event_type = type_element.get_text().strip() if type_element else 'Not Available'
    region = region_element.get_text().strip() if region_element else 'Not Available'

    if 'Not Available' in [name, date, location, event_type, region]:
        return None

    return {
        'Name': name,
        'Date': date,
        'Location': location,
        'Type': event_type,
        'Region': region
    }

events_data = []
for url in event_links:
    event_detail = scrape_event_details(url)
    if event_detail:
        events_data.append(event_detail)

# Write to CSV
with open('events.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Name', 'Date', 'Location', 'Type', 'Region'])
    writer.writeheader()
    for event in events_data:
        writer.writerow(event)


NameError: name 'csv' is not defined

In [22]:
def get_lat_lon(location_name):
    base_url = "https://nominatim.openstreetmap.org/search.php"
    query_params = {
        "q": location_name,
        "format": "jsonv2"
    }
    res = requests.get(base_url, params=query_params)
    location_data = res.json()
    
    if location_data:
        latitude = location_data[0]['lat']
        longitude = location_data[0]['lon']
        return latitude, longitude
    else:
        return None, None


In [23]:
def get_weather_info(lat, lon):
    weather_url = f"https://api.weather.gov/points/{lat},{lon}"
    res = requests.get(weather_url)
    if res.status_code != 200:
        return "Weather data not available"

    point_dict = res.json()
    forecast_url = point_dict['properties']['forecast']
    forecast_res = requests.get(forecast_url)
    forecast_data = forecast_res.json()

    weather_info = forecast_data['properties']['periods'][0]['detailedForecast']
    return weather_info


In [27]:
events_data = []
for url in event_links:
    event_detail = scrape_event_details(url)
    if event_detail is not None:
        events_data.append(event_detail)

print(events_data[:5])

# Write to CSV
with open('enhanced_events.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Name', 'Date', 'Location', 'Type', 'Region', 'Latitude', 'Longitude', 'Weather'])
    writer.writeheader()

    for event in events_data:
        lat, lon = get_lat_lon(event['Location'])
        weather_info = get_weather_info(lat, lon) if lat and lon else "Weather data not available"

        event['Latitude'] = lat if lat else "Not Available"
        event['Longitude'] = lon if lon else "Not Available"
        event['Weather'] = weather_info

        writer.writerow(event)

[{'Name': 'Steinunn Porarinsdottir: Wayfinders', 'Date': 'Now through 1/28/2024 |  National Nordic Museum', 'Location': 'National Nordic Museum', 'Type': 'National Nordic Museum', 'Region': 'National Nordic Museum'}, {'Name': 'Hanako O’Leary: Izanami', 'Date': 'Now through 1/28/2024 |  Frye Art Museum', 'Location': 'Frye Art Museum', 'Type': 'Frye Art Museum', 'Region': 'Frye Art Museum'}, {'Name': 'Gage Alumni Show', 'Date': 'Now through 1/28/2024 |  Fountainhead Gallery', 'Location': 'Fountainhead Gallery', 'Type': 'Fountainhead Gallery', 'Region': 'Fountainhead Gallery'}, {'Name': 'Genre Box', 'Date': 'Now through 1/28/2024 |  Market Theatre', 'Location': 'Market Theatre', 'Type': 'Market Theatre', 'Region': 'Market Theatre'}, {'Name': 'Bohemia', 'Date': 'Now through 1/28/2024 |  The Triple Door', 'Location': 'The Triple Door', 'Type': 'The Triple Door', 'Region': 'The Triple Door'}]
