# Goal: Produce a cleaned dataset of universities and corresponding EPP complaint filing dates.

## Let's rely on playwright to handle this scraping. 

In [1]:
from playwright.async_api import async_playwright  

In [2]:
playwright = await async_playwright().start()


In [3]:
browser = await playwright.chromium.launch(headless=False)

In [4]:
page = await browser.new_page()

In [5]:
await page.goto("https://equalprotect.org/case/")

<Response url='https://equalprotect.org/case/' request=<Request url='https://equalprotect.org/case/' method='GET'>>

In [6]:
full_html = await page.content()

full_html

'<!DOCTYPE html><html lang="en-US" class=" no-touchevents js_active  vc_desktop  vc_transform  vc_transform " style="height: auto; overflow: auto;"><head><meta http-equiv="origin-trial" content="A7vZI3v+Gz7JfuRolKNM4Aff6zaGuT7X0mf3wtoZTnKv6497cVMnhy03KDqX7kBz/q/iidW7srW31oQbBt4VhgoAAACUeyJvcmlnaW4iOiJodHRwczovL3d3dy5nb29nbGUuY29tOjQ0MyIsImZlYXR1cmUiOiJEaXNhYmxlVGhpcmRQYXJ0eVN0b3JhZ2VQYXJ0aXRpb25pbmczIiwiZXhwaXJ5IjoxNzU3OTgwODAwLCJpc1N1YmRvbWFpbiI6dHJ1ZSwiaXNUaGlyZFBhcnR5Ijp0cnVlfQ=="><script type="text/javascript" async="" charset="utf-8" src="https://www.gstatic.com/recaptcha/releases/N67nZn4AqZkNcbeMu4prBgzg/recaptcha__en.js" crossorigin="anonymous" integrity="sha384-8HATTPUsU/SjrV9KR44/tgBG8Czia5FPTCnvn3iAGBsMsUqgy9L0h45QhvFE9aql"></script><script src="https://cdn.jsdelivr.net/npm/react-dom@16/umd/react-dom.production.min.js" type="text/javascript"></script><script src="https://cdn.jsdelivr.net/npm/react@16/umd/react.production.min.js" type="text/javascript"></script><script src="ht

In [7]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(full_html, "html.parser")
soup

<!DOCTYPE html>
<html class="no-touchevents js_active vc_desktop vc_transform vc_transform" lang="en-US" style="height: auto; overflow: auto;"><head><meta content="A7vZI3v+Gz7JfuRolKNM4Aff6zaGuT7X0mf3wtoZTnKv6497cVMnhy03KDqX7kBz/q/iidW7srW31oQbBt4VhgoAAACUeyJvcmlnaW4iOiJodHRwczovL3d3dy5nb29nbGUuY29tOjQ0MyIsImZlYXR1cmUiOiJEaXNhYmxlVGhpcmRQYXJ0eVN0b3JhZ2VQYXJ0aXRpb25pbmczIiwiZXhwaXJ5IjoxNzU3OTgwODAwLCJpc1N1YmRvbWFpbiI6dHJ1ZSwiaXNUaGlyZFBhcnR5Ijp0cnVlfQ==" http-equiv="origin-trial"/><script async="" charset="utf-8" crossorigin="anonymous" integrity="sha384-8HATTPUsU/SjrV9KR44/tgBG8Czia5FPTCnvn3iAGBsMsUqgy9L0h45QhvFE9aql" src="https://www.gstatic.com/recaptcha/releases/N67nZn4AqZkNcbeMu4prBgzg/recaptcha__en.js" type="text/javascript"></script><script src="https://cdn.jsdelivr.net/npm/react-dom@16/umd/react-dom.production.min.js" type="text/javascript"></script><script src="https://cdn.jsdelivr.net/npm/react@16/umd/react.production.min.js" type="text/javascript"></script><script src="https:

In [8]:
links = soup.find_all("a", class_="text-xl font-semibold black-st hover:blue-st")

for link in links:
    href = link.get("href") 
    full_text = link.get_text(strip=True)  
    if "v." in full_text:
        university_name = full_text.split("v.")[-1].strip()
    else:
        university_name = full_text 
    
    print(university_name, href)

Metropolitan State University of Denver https://equalprotect.org/case/equal-protection-project-v-metropolitan-state-university-of-denver/
UC Office of the President, et al. – Puente Project https://equalprotect.org/case/equal-protection-project-v-univ-of-california-office-of-the-president-et-al/
University of Tennessee, Knoxville https://equalprotect.org/case/equal-protection-project-v-university-of-tennessee-knoxville/
Massachusetts Institute of Technology – Womxn Programs https://equalprotect.org/case/equal-protection-project-v-massachusetts-institute-of-technology-womxn-programs/
Seven Illinois Public Universities https://equalprotect.org/case/equal-protection-project-v-seven-illinois-public-universities/
Harvard University – Union Scholars Program https://equalprotect.org/case/equal-protection-project-v-harvard-university-union-scholars-program/
138 California Colleges and Universities https://equalprotect.org/case/equal-protection-project-v-138-california-colleges/
Lehigh Universi

## We will now do a multi page scrape to get the date of when the EPP filed a complaint against the university.

In [10]:
import requests
my_url = 'https://equalprotect.org/case/equal-protection-project-v-seven-illinois-public-universities/'
raw_html = requests.get(my_url)

soup_doc = BeautifulSoup(raw_html.content,'html.parser')

soup_doc

<!DOCTYPE html>

<html lang="en-US">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width,initial-scale=1,user-scalable=no" name="viewport"/>
<link href="https://gmpg.org/xfn/11" rel="profile"/>
<link href="https://equalprotect.org/xmlrpc.php" rel="pingback"/>
<title>Equal Protect | Equal Protection Project v. Seven Illinois Public Universities</title>
<meta content="max-image-preview:large" name="robots">
<script id="cookieyes" src="https://cdn-cookieyes.com/client_data/ce3893bf3a5e602faed40d77/script.js" type="text/javascript"></script><link href="//fonts.googleapis.com" rel="dns-prefetch"/>
<link href="https://equalprotect.org/feed/" rel="alternate" title="Equal Protect » Feed" type="application/rss+xml"/>
<link href="https://equalprotect.org/comments/feed/" rel="alternate" title="Equal Protect » Comments Feed" type="application/rss+xml"/>
<link href="https://equalprotect.org/wp-json/oembed/1.0/embed?url=https%3A%2F%2Fequalprotect.org%2Fcase%2Fequal-protection-project-v-

In [11]:
date=soup_doc.find_all("p", class_="pb-[25px] text-black-st")[1].get_text(strip=True)
date

'12/22/2025'

In [12]:
import requests
from bs4 import BeautifulSoup

links = soup.find_all("a", class_="text-xl font-semibold black-st hover:blue-st")

case_data = []

for link in links:
    href = link.get("href")
    full_text = link.get_text(strip=True)
    
    if "v." in full_text:
        university_name = full_text.split("v.")[-1].strip()
    else:
        university_name = full_text

    response = requests.get(href)
    soup_doc = BeautifulSoup(response.text, "html.parser")
    
    try:
        date = soup_doc.find_all("p", class_="pb-[25px] text-black-st")[1].get_text(strip=True)
    except IndexError:
        date = "N/A"  
        
    case_data.append({
        "university": university_name,
        "href": href,
        "date_filed": date
    })


for case in case_data:
    print(case)

{'university': 'Metropolitan State University of Denver', 'href': 'https://equalprotect.org/case/equal-protection-project-v-metropolitan-state-university-of-denver/', 'date_filed': 'January 30, 2026'}
{'university': 'UC Office of the President, et al. – Puente Project', 'href': 'https://equalprotect.org/case/equal-protection-project-v-univ-of-california-office-of-the-president-et-al/', 'date_filed': 'January 18, 2026'}
{'university': 'University of Tennessee, Knoxville', 'href': 'https://equalprotect.org/case/equal-protection-project-v-university-of-tennessee-knoxville/', 'date_filed': 'January 6, 2026'}
{'university': 'Massachusetts Institute of Technology – Womxn Programs', 'href': 'https://equalprotect.org/case/equal-protection-project-v-massachusetts-institute-of-technology-womxn-programs/', 'date_filed': 'December 29, 2025'}
{'university': 'Seven Illinois Public Universities', 'href': 'https://equalprotect.org/case/equal-protection-project-v-seven-illinois-public-universities/', '

## END. 