In [1]:
from flask import Flask, render_template
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from datetime import datetime
import requests
import googlemaps
import re
import json
import os
import time

app = Flask(__name__)

present_date = datetime.today().strftime("%Y-%m-%d")


url = f"https://today.wisc.edu/events/day/{present_date}"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")


events = soup.select("li.event-row")

events_list = []

for event in events:
    title_element = event.select_one("h3.event-title a")
    title = title_element.text.strip()

    location_elements= event.select_one("p.event-location")

    if (location_elements):
        location_elements = location_elements.get_text(strip=True)


    time_elements = event.select("p.event-time span.time-hm")
    time_values = [t.get_text(strip=True) for t in time_elements]

    
    am_pm_element = event.select("p.event-time")
    am_pm_list = []
    if am_pm_element:
        full_time_text = am_pm_element[0].get_text(separator=" ", strip=True)
        am_pm_matches = re.findall(r"(a\.m\.|p\.m\.)", full_time_text, re.IGNORECASE)
        am_pm_list.append(am_pm_matches)

    if time_values:
        if (len(time_values) == 1):
            assigned_am_pm = am_pm_matches[0] if am_pm_matches else ""
            start_time = f"{time_values[0]}{assigned_am_pm}"
            end_time = None

        elif len(time_values) == 2:
            if (len(am_pm_matches) == 1):
                assigned_am_pm = am_pm_matches[0] if am_pm_matches else ""
            
            elif (len(am_pm_matches) == 2):
                start_am_pm, end_am_pm = am_pm_matches

            else:
                start_am_pm = end_am_pm = ""

            start_time = f"{time_values[0]}{start_am_pm}"
            end_time = f"{time_values[1]}{end_am_pm}"

    else:
        start_time = "All Day!"
        end_time = None


    
    event_data = {
        "title" : title,
        "location" : location_elements,
        "start_time" : start_time,
        "end_time" : end_time,
    }

    events_list.append(event_data)



if 'events.json' not in os.listdir("./"):
    with open("events.json", "w") as json_file:
        json.dump(events_list, json_file, indent=4)


In [2]:
print(soup.prettify()[:20000])

<!DOCTYPE html>
<html class="no-js" data-action-name="day" data-controller-name="events" lang="en-US">
 <head>
  <title>
   Today, February 23, 2025 | UW–Madison Events Calendar
  </title>
  <meta charset="utf-8"/>
  <link as="font" crossorigin="" href="https://cdn.wisc.cloud/fonts/uw-rh/0.0.1/redhat-display-latin.v14.woff2" rel="preload" type="font/woff2"/>
  <link as="font" crossorigin="" href="https://cdn.wisc.cloud/fonts/uw-rh/0.0.1/redhat-text-latin.v13.woff2" rel="preload" type="font/woff2"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <link href="/assets/uw_style/favicon-d80c8d0f3a3281d962db2ac2bac9d4641a8a8fa2c4a7f19ff3ea0381054813b1.ico" rel="icon" type="image/x-icon">
   <script>
    (function(H){H.className=H.className.replace(/\bno-js\b/,'')})(document.documentElement)
   </script>
   <link crossorigin="" data-turbo-track="reload" href="https://cdn.wisc.cloud/fonts/uw-rh/0.0.1/fonts.css" rel="stylesheet" type="text/css">
    <link href="/assets/application-5e

In [3]:
from datetime import datetime
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))


2025-02-23 15:44:19


In [4]:
chrome_options = Options()
chrome_options.add_argument("--headless")  
chrome_options.add_argument("--disable-gpu")  
chrome_options.add_argument("--no-sandbox")  
chrome_options.add_argument("--disable-dev-shm-usage")  
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

events = soup.select("li.event-row")
event_links = soup.select("h3.event-title a")

for link in event_links:
    href_attr = link["href"]
    if href_attr.startswith("/events/view/"):
        event_url = "https://today.wisc.edu" + href_attr 
    else:
        event_url = href_attr

    driver.get(event_url)
    time.sleep(1)

    event_stuff = BeautifulSoup(driver.page_source, "html.parser")
    description_element = event_stuff.select_one("div.event-description")
    if description_element:
        event_description = description_element.get_text(strip = True)
    else:
        event_description = event_url

    print(event_description)

    # description_element = event_stuff.select_one("div-event-description")
    # print(description_element)

Exhibition dates: February 1 — 28, Reception: Saturday, February 1, 6-9pm. Come check out the paintings of undergraduate senior Rachael Hunter, grab a drink, and get creative! Beer and food are available for purchase, and you can also shop stickers and prints featuring Hunter's work. If you can’t make it to the opening night, don’t worry—the paintings will be up for the whole month.
It’s All the Rage: Activism, Aging, and the Raging Grannies of Madison grows from a two-year archival and oral history project. This exhibit focuses on several decades of local Wisconsin grassroots activism enacted by the Madison and Dane County Raging Grannies. Featuring an array of granny hats, aprons, songbooks, political buttons, and instruments, as well as video and audio clips of their original songs, this exhibit explores how senior women have engaged in a wide variety of activities.
Exhibition dates: January 31 – March 1, Reception: Friday, January 31, 5:30-7:30pm. Curated by alumna Annmarie Suglio,

In [5]:
events = soup.select("li.event-row")
event_links = soup.select("h3.event-title a")

for link in event_links:
    href_attr = link["href"]
    if href_attr.startswith("/events/view/"):
        event_url = "https://today.wisc.edu" + href_attr 
    else:
        event_url = href_attr

    driver.get(event_url)

    event_stuff = BeautifulSoup(driver.page_source, "html.parser")
    tag_elements = event_stuff.select("ul.event-tags li a")
    event_tags = [tag.text.strip() for tag in tag_elements] if tag_elements else []
    print(event_tags)
    # print(tag_elements)
    # event_tags = [tag_element.strip() for tag_element in tag_elements] if tag_elements else None
    # print(event_tags)

['Arts', 'Exhibitions', 'Art', 'Exhibition', 'Art department', 'School of Education', 'Arts at uw']
['Arts', 'Exhibitions', 'Humanities', 'Aging', 'Activism', 'Gender', 'Material culture', 'School of human ecology', 'SoHE', 'Gender and Women Studies', 'Center for Design and Material Culture', 'CDMC', 'Center for Research on Gender and Women', 'Raging grannies']
['Arts', 'Exhibitions', 'Art', 'Exhibition', 'Art department', 'School of Education', 'Arts at uw', 'Textile art', 'Fiber art']
['Arts', 'Exhibitions', 'Art', 'Exhibition', 'Art department', 'School of Education', 'Arts at uw']
['Arts', 'Exhibitions', 'International', 'History', 'Humanities', 'Design', 'Textiles', 'Material culture', 'SoHE', 'Archive', 'Lynn Mecklenburg Textile Gallery', 'Helen Louise Allen', 'textiles, art, SoHE', 'Center for Design and Material Culture', 'CDMC', 'Campus collections']
['Arts', 'Humanities', 'Chazen Museum of Art']
['Arts', 'Exhibitions', 'Science', 'Humanities', 'Design', 'Textiles', 'Material 