In [1]:
# config, used for both scraping methods
base = "https://uiuc.libcal.com/"
all = "allspaces" # base + all
id  = { # base + "spaces/?lid=" + id of desired library
    "chemistry": 5903,
    "grainger": 3606,
    "international": 5766,
    "main": 3608,
    "music": 3153,
    "studio": 16231
}
args = "&gid=0&c=0" # additional arguments

# url to scrape
url = base + "spaces?lid=" + str(id["grainger"]) + args
print(url)

https://uiuc.libcal.com/spaces?lid=3606&gid=0&c=0


### Using selenium
Recommended

In [2]:
# imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

In [3]:
# open selenium instance, using headless to allow running on a server
options = webdriver.ChromeOptions()
options.add_argument("--headless=new")
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)

# non-headless version, for debugging
# driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)

[WDM] - Downloading: 100%|██████████| 6.79M/6.79M [00:00<00:00, 18.7MB/s]
  driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)


In [4]:
# navigate to url
driver.get(url)

In [5]:
# extract data
events = [ev.get_attribute("title") for ev in driver.find_elements(By.CLASS_NAME, "fc-event-today")]
len(events)

169

In [6]:
for ev in events:
    print(ev)

5:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
6:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
6:30pm Sunday, April 2, 2023 - 040A - Available
7:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
7:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
8:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
8:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
9:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
9:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
10:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
10:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
11:00pm Sunday, April 2, 2023 - 040A - Available
11:30pm Sunday, April 2, 2023 - 040A - Available
5:30pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
6:00pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
6:30pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
7:00pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
7:30pm Sunday, April 2, 2023 - 040B - Un

In [7]:
# quit driver, if running
driver.quit()

### Using requests-html
This is less consistent than selenium, but I'm leaving it as a secondary option

In [8]:
# import
from requests_html import AsyncHTMLSession

In [27]:
# open session, navigate to page, and render JS
session = AsyncHTMLSession()
r = await session.get(url)
await r.html.arender()

In [28]:
# extract data
events = [ev.attrs["title"] for ev in r.html.find(".fc-event-today")]
len(events)

169

In [14]:
for ev in events:
    print(ev)

5:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
6:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
6:30pm Sunday, April 2, 2023 - 040A - Available
7:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
7:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
8:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
8:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
9:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
9:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
10:00pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
10:30pm Sunday, April 2, 2023 - 040A - Unavailable/Padding
11:00pm Sunday, April 2, 2023 - 040A - Available
11:30pm Sunday, April 2, 2023 - 040A - Available
5:30pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
6:00pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
6:30pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
7:00pm Sunday, April 2, 2023 - 040B - Unavailable/Padding
7:30pm Sunday, April 2, 2023 - 040B - Un