In [40]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as BS
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time

In [292]:
# Step 1: Set up Selenium
options = Options()
options.add_argument("--headless")  # Run in background, no browser window
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

In [294]:
# Step 2: Load page
url = "https://adelaideuni.edu.au/study/study-areas/accounting-commerce-economics/"
driver.get(url)

In [208]:
html = driver.page_source
soup = BS(html, "html.parser")

In [48]:
soup

<html lang="en"><head>
<meta charset="utf-8"/>
<title>Accounting, Commerce &amp; Economics – Information for Australian Students</title>
<meta content="Accounting, Commerce &amp; Economics" name="keywords"/>
<meta content="Adelaide University’s accounting, commerce, and economics degrees make a real-world impact. Industry-informed, you'll graduate with a broad skillset so you’re job-ready." name="description"/>
<meta content="study-area-template" name="template"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<!-- Adding Meta tags for Funnelback-->
<meta content="Study Area" property="og:type"/>
<meta content="Study Area" property="type"/>
<meta property="og:title"/>
<meta content="Adelaide University’s accounting, commerce, and economics degrees make a real-world impact. Industry-informed, you'll graduate with a broad skillset so you’re job-ready." property="og:description"/>
<meta content="https://adelaideuni.edu.au/study/study-areas/accounting-commerce-econom

In [50]:
degree_cards = soup.find_all("div", class_="degree-card-title-container-row-title")

In [52]:
degree_cards

[<div class="degree-card-title-container-row-title">
                     Bachelor of Accounting
                 </div>,
 <div class="degree-card-title-container-row-title">
                     Bachelor of Business
                 </div>,
 <div class="degree-card-title-container-row-title">
                     Bachelor of Business (Economics, Finance and Trade)
                 </div>,
 <div class="degree-card-title-container-row-title">
                     Bachelor of Business (Financial Planning)
                 </div>,
 <div class="degree-card-title-container-row-title">
                     Bachelor of Business (Management)
                 </div>,
 <div class="degree-card-title-container-row-title">
                     Bachelor of Business majoring in Digital Business
                 </div>,
 <div class="degree-card-title-container-row-title">
                     Bachelor of Business majoring in Human Resource Management
                 </div>,
 <div class="degree-card-t

In [82]:
bachelor_degrees = [name for name in degree_names if name.startswith("Bachelor")]

In [84]:
bachelor_degrees

['Bachelor of Accounting',
 'Bachelor of Business',
 'Bachelor of Business (Economics, Finance and Trade)',
 'Bachelor of Business (Financial Planning)',
 'Bachelor of Business (Management)',
 'Bachelor of Business majoring in Digital Business',
 'Bachelor of Business majoring in Human Resource Management',
 'Bachelor of Business majoring in Innovation, Entrepreneurship and Strategy',
 'Bachelor of Business majoring in International Business',
 'Bachelor of Business majoring in Management',
 'Bachelor of Business majoring in Procurement and Supply Chain Management',
 'Bachelor of Business majoring in Project Management',
 'Bachelor of Commerce majoring in Accounting',
 'Bachelor of Commerce majoring in Banking and Finance',
 'Bachelor of Commerce majoring in Business Analytics',
 'Bachelor of Commerce majoring in Financial Planning',
 'Bachelor of Commerce majoring in Property',
 'Bachelor of Digital Business',
 'Bachelor of Economics',
 'Bachelor of Economics (Honours)',
 'Bachelor of

In [194]:
ba_degree_links = []
degree_anchors = soup.find_all("a", class_="degree-card-title-container-row")

for anchor in degree_anchors:
    title_div = anchor.find("div", class_="degree-card-title-container-row-title")
    title = title_div.get_text(strip=True) if title_div else ""

    # Get link
    href = anchor.get("href")

    # Filter only Bachelor degrees
    if href and title.startswith("Bachelor"):
        full_url = href
        ba_degree_links.append(full_url)

In [284]:
ba_degree_links

['https://adelaideuni.edu.au/study/degrees/online/bachelor-of-accounting/dom/',
 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business/dom/',
 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-business-economics-finance-and-trade/dom/',
 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-business-financial-planning/dom/',
 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-business-management/dom/',
 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business-digital-business/dom/',
 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business-human-resource-management/dom/',
 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business-innovation-entrepreneurship-and-strategy/dom/',
 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business-international-business/dom/',
 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business-management/dom/',
 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business-procurement-and-supp

In [272]:
ba_atar_cutoff = []

for url in ba_degree_links:
    driver.get(url)
    #time.sleep(3)  # Or better: use WebDriverWait (I can show you)

    # Get updated page HTML
    html = driver.page_source
    page_soup = BS(html, "html.parser")

    # Find the ATAR score
    subtitle_div = page_soup.find("div", class_="degree-details-content-section-subtitle flex-col")

    atar_score = "N/A"  # Default in case it's missing

    if subtitle_div:
        spans = subtitle_div.find_all("span")
        for span in spans:
            text = span.get_text(strip=True)
            if text.startswith("Guaranteed entry score (ATAR-based):"):
                atar_score = text.split(":")[1].strip()

    ba_atar_cutoff.append(atar_score)

In [274]:
ba_atar_cutoff

['N/A',
 '70',
 'N/A',
 'N/A',
 'N/A',
 '70',
 'N/A',
 '70',
 '70',
 '70',
 '70',
 '70',
 '80',
 '80',
 '80',
 '80',
 '80',
 'N/A',
 '70',
 '80',
 '70',
 'N/A',
 '70']

In [296]:
ba_prereq = []

for url in ba_degree_links:
    driver.get(url)

    # Get updated page HTML
    html = driver.page_source
    page_soup = BS(html, "html.parser")

    prereq = "N/A"  # Default in case it's missing

    # Loop through all icon-list-top blocks
    icon_blocks = page_soup.find_all("div", class_="degree-details-content-section-icon-list-top")

    for block in icon_blocks:
        # Look for heading span
        heading_span = block.find("div", class_="degree-details-content-section-icon-list-top-heading-content d-flex").find("span")
        heading_text = heading_span.get_text(strip=True) if heading_span else ""

        if heading_text == "Prerequisite":
            # Now extract subtitle span
            subtitle_div = block.find("div", class_="degree-details-content-section-subtitle")
            subtitle_span = subtitle_div.find("span") if subtitle_div else None
            prereq = subtitle_span.get_text(strip=True) if subtitle_span else "N/A"
            break  # Stop after finding first match

    ba_prereq.append(prereq)

In [298]:
ba_prereq

['None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None',
 'None']

In [368]:
ba_mode = []

for url in ba_degree_links:
    driver.get(url)
    #time.sleep(3)  # Or better: use WebDriverWait (I can show you)

    # Get updated page HTML
    html = driver.page_source
    page_soup = BS(html, "html.parser")

    block1 = page_soup.find("div", class_="degree-details-content-section-icon-list-top degree-mode")
    block2 = block1.find_all('span')

    mode = []
    
    for i in block2:
        mode = i.get_text(strip=True)

    ba_mode.append(mode)

In [369]:
ba_mode

['100% Online',
 'On Campus',
 '100% Online',
 '100% Online',
 '100% Online',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 'On Campus',
 '100% Online',
 'On Campus',
 'On Campus',
 'On Campus',
 '100% Online',
 'On Campus']

In [None]:
degree-details-content-section-icon-list-top

In [None]:
ba_start_date = []

for url in ba_degree_links:
    driver.get(url)

    # Get updated page HTML
    html = driver.page_source
    page_soup = BS(html, "html.parser")

    prereq = "N/A"  # Default in case it's missing

    # Loop through all icon-list-top blocks
    icon_blocks = page_soup.find_all("div", class_="degree-details-content-section-icon-list-top")

    for block in icon_blocks:
        # Look for heading span
        heading_span = block.find("div", class_="degree-details-content-section-icon-list-top-heading-content d-flex").find("span")
        heading_text = heading_span.get_text(strip=True) if heading_span else ""

        if heading_text == "Prerequisite":
            # Now extract subtitle span
            subtitle_div = block.find("div", class_="degree-details-content-section-subtitle")
            subtitle_span = subtitle_div.find("span") if subtitle_div else None
            prereq = subtitle_span.get_text(strip=True) if subtitle_span else "N/A"
            break  # Stop after finding first match

    ba_start_date.append(prereq)

In [580]:
driver.get('https://adelaideuni.edu.au/study/degrees/online/bachelor-of-business-management/dom/')
html = driver.page_source
page_soup = BS(html, "html.parser")
block3 = page_soup.find_all("div", class_="degree-details-content-section-icon-list-top")

In [582]:
block3

[<div class="degree-details-content-section-icon-list-top degree-mode">
 <div class="degree-details-content-section-icon-list-top-heading-content d-flex">
 <div class="degree-details-content-section-icon-list-top-heading-content d-flex">
 <span>Mode</span>
 </div>
 <!-- Add tooltip container -->
 <div class="tooltip-container">
 <div class="tooltip-content">
 <div class="tooltip-header">
 <span class="tooltip-heading">Mode</span>
 <div class="tooltip-close"><span class="tooltip-close-icon"></span></div>
 </div>
 <div class="tooltip-body">
 <span class="tooltip-subtitle">Your study is delivered 100% online</span>
 </div>
 </div>
 </div>
 <!-- End of tooltip container -->
 <div class="degree-details-content-section-icon-list-top-heading-content-circle-info-icon"></div>
 </div>
 <span>100% Online</span>
 </div>,
 <div class="degree-details-content-section-icon-list-top">
 <div class="degree-details-content-section-icon-list-top-heading-content d-flex">
 <div class="degree-details-content-

In [630]:
for i in block3:
    spans = i.find_all('span')
    #print(spans)
    
    if spans: # shorthad for if len(spans) > 0:
        name = spans[0].get_text(strip=True)
        value = spans[-1].get_text(strip=True)
        print(name, value)

Mode 100% Online
Start date January, April, June, September
Duration 3 year(s) full-time
Program code XBBMG
Study as Full time or part time
Indicative annual fees Non-tuition fees
Prerequisite None
Assumed knowledge None
Time commitment 10-15 hours per week per course
Check your eligibility opens in a new tab


In [624]:
### EXPLANATION of why we did extracted the first span and last span. Unhash the code above to see why

#[<span>Mode</span>, 
 #<span class="tooltip-heading">Mode</span>, 
 #<span class="tooltip-close-icon"></span>, 
 #<span class="tooltip-subtitle">Your study is delivered 100% online</span>, 
 #<span>100% Online</span>]

# The first span is always the label, and the last span is always the actual value the users sees

# Future direction, I can just extract everything now.

In [638]:
# Create an empty list to store dictionaries
degree_data = []

# Loop through each block and extract name/value
for i in block3:
    spans = i.find_all('span')
    
    if spans:
        name = spans[0].get_text(strip=True)
        value = spans[-1].get_text(strip=True)
        
        # Add as a dictionary
        degree_data.append({
            "Field": name,
            "Value": value
        })

df = pd.DataFrame(degree_data)

In [640]:
df

Unnamed: 0,Field,Value
0,Mode,100% Online
1,Start date,"January, April, June, September"
2,Duration,3 year(s) full-time
3,Program code,XBBMG
4,Study as,Full time or part time
5,Indicative annual fees,Non-tuition fees
6,Prerequisite,
7,Assumed knowledge,
8,Time commitment,10-15 hours per week per course
9,Check your eligibility,opens in a new tab


In [588]:
ba_start_dates = []

start_date = "N/A"

icon_blocks = page_soup.find_all("div", class_="degree-details-content-section-icon-list-top")

for block in icon_blocks:
    heading_span = block.find("div", class_="degree-details-content-section-icon-list-top-heading-content d-flex")
    heading_text = heading_span.find("span").get_text(strip=True) if heading_span else ""

    if heading_text == "Start date":
        # The start dates are in a span directly under the block
        date_span = block.find("span")
        start_date = date_span.get_text(strip=True) if date_span else "N/A"
        break  # Found it → stop the loop

ba_start_dates.append(start_date)

In [590]:
ba_start_dates

['Start date']

In [372]:
accounting_list = pd.DataFrame({
    'degree_name': bachelor_degrees,
    '2025_atar_cutoff': ba_atar_cutoff,
    'Prerequisite': ba_prereq,
    'Mode': ba_mode,
    'degree_links': ba_degree_links
})

In [374]:
accounting_list

Unnamed: 0,degree_name,2025_atar_cutoff,Prerequisite,Mode,degree_links
0,Bachelor of Accounting,,,100% Online,https://adelaideuni.edu.au/study/degrees/onlin...
1,Bachelor of Business,70.0,,On Campus,https://adelaideuni.edu.au/study/degrees/bache...
2,"Bachelor of Business (Economics, Finance and T...",,,100% Online,https://adelaideuni.edu.au/study/degrees/onlin...
3,Bachelor of Business (Financial Planning),,,100% Online,https://adelaideuni.edu.au/study/degrees/onlin...
4,Bachelor of Business (Management),,,100% Online,https://adelaideuni.edu.au/study/degrees/onlin...
5,Bachelor of Business majoring in Digital Business,70.0,,On Campus,https://adelaideuni.edu.au/study/degrees/bache...
6,Bachelor of Business majoring in Human Resourc...,,,On Campus,https://adelaideuni.edu.au/study/degrees/bache...
7,"Bachelor of Business majoring in Innovation, E...",70.0,,On Campus,https://adelaideuni.edu.au/study/degrees/bache...
8,Bachelor of Business majoring in International...,70.0,,On Campus,https://adelaideuni.edu.au/study/degrees/bache...
9,Bachelor of Business majoring in Management,70.0,,On Campus,https://adelaideuni.edu.au/study/degrees/bache...
