In [2]:
import requests
from bs4 import BeautifulSoup
import csv

In [4]:
url = "https://adelaideuni.edu.au/study/study-areas/accounting-commerce-economics/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

In [6]:
soup


<!DOCTYPE HTML>

<html lang="en">
<head>
<meta charset="utf-8"/>
<title>Accounting, Commerce &amp; Economics – Information for Australian Students</title>
<meta content="Accounting, Commerce &amp; Economics" name="keywords"/>
<meta content="Adelaide University’s accounting, commerce, and economics degrees make a real-world impact. Industry-informed, you'll graduate with a broad skillset so you’re job-ready." name="description"/>
<meta content="study-area-template" name="template"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<meta content="Study Area" property="og:type"/>
<meta content="Study Area" property="type"/>
<meta property="og:title"/>
<meta content="Adelaide University’s accounting, commerce, and economics degrees make a real-world impact. Industry-informed, you'll graduate with a broad skillset so you’re job-ready." property="og:description"/>
<meta content="https://adelaideuni.edu.au/study/study-areas/accounting-commerce-economics/" property="og:ur

In [8]:
degrees = []

for link in soup.find_all("a", href=True):
    href = link['href']
    text = link.get_text(strip=True)
    # Filter for <a> tags that clearly link to degrees (adjust this to fit actual HTML structure)
    if "/degrees/" in href and text:
        # Sometimes the link may be relative, so prepend the domain if missing
        if href.startswith('/'):
            href = f"https://adelaideuni.edu.au{href}"
        degrees.append({"name": text, "url": href})

In [10]:
degrees

[{'name': 'Compare degrees',
  'url': 'https://adelaideuni.edu.au/study/degrees/compare-degrees/'},
 {'name': 'Compare degrees',
  'url': 'https://adelaideuni.edu.au/study/degrees/compare-degrees/'},
 {'name': 'Diploma in Digital Business',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/diploma-in-digital-business/dom/'},
 {'name': 'View more',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/diploma-in-digital-business/dom/'},
 {'name': 'Bachelor of Accounting',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-accounting/dom/'},
 {'name': 'View more',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-accounting/dom/'},
 {'name': 'Bachelor of Business',
  'url': 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business/dom/'},
 {'name': 'View more',
  'url': 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business/dom/'},
 {'name': 'Bachelor of Business (Economics, Finance and Trade)',
  'url': 'https://adelaideuni

In [12]:
ba_degrees = [degree for degree in degrees if degree['name'].startswith('Bachelor')]

In [14]:
ba_degrees

[{'name': 'Bachelor of Accounting',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-accounting/dom/'},
 {'name': 'Bachelor of Business',
  'url': 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business/dom/'},
 {'name': 'Bachelor of Business (Economics, Finance and Trade)',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-business-economics-finance-and-trade/dom/'},
 {'name': 'Bachelor of Business (Financial Planning)',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-business-financial-planning/dom/'},
 {'name': 'Bachelor of Business (Management)',
  'url': 'https://adelaideuni.edu.au/study/degrees/online/bachelor-of-business-management/dom/'},
 {'name': 'Bachelor of Business majoring in Digital Business',
  'url': 'https://adelaideuni.edu.au/study/degrees/bachelor-of-business-digital-business/dom/'},
 {'name': 'Bachelor of Business majoring in Human Resource Management',
  'url': 'https://adelaideuni.edu.au/study/

In [16]:
def extract_all_courses_with_sections(soup, degree_name):
    results = []
    tables = soup.find_all("table", class_="cmp-data-table")
    for table in tables:
        
        # Try to get the nearest section header (walk up DOM)
        section = None
        current = table
        
        # Walk up until we find a header tag or accordion button
        for _ in range(5):  # Limit climbs to avoid infinite loop
            current = current.find_previous(['h2', 'h3', 'button'])
            if not current:
                break
            tag = current.name
            if tag == 'button':
                section = current.get_text(strip=True)
                break
            elif tag in ['h2', 'h3']:
                section = current.get_text(strip=True)
                break
                
        # Now extract all rows
        rows = table.find("tbody").find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            if len(tds) < 3: continue  # Defensive
            name_a = tds[1].find("a", class_="table-content")
            name_div = tds[1].find("div", class_="table-content")
            if name_a:
                course_name = name_a.get_text(strip=True)
            elif name_div:
                course_name = name_div.get_text(strip=True)
            else:
                course_name = ""
            code_tag = tds[2].find("div", class_="table-content")
            course_code = code_tag.get_text(strip=True) if code_tag else ""
            results.append({
                "degree": degree_name,
                "section": section if section else "",
                "course_code": course_code,
                "course_name": course_name
            })
    return results

In [16]:
def extract_all_courses_with_sections(soup, degree_name):
    results = []

    # STEP 1: Build mapping of section div ids -> section names (from tab buttons)
    section_map = {}
    tabs_div = soup.find("div", class_="cmp-course-info-by-year__year-tabs")
    if tabs_div:
        for btn in tabs_div.find_all("button"):
            target = btn.get("data-course-info-target", "")
            if target:
                section_id = target.lstrip("#")
                section_name = btn.get_text(strip=True)
                section_map[section_id] = section_name

    # STEP 2: Loop through tables, climbing up DOM to match section id
    tables = soup.find_all("table", class_="cmp-data-table")
    for table in tables:
        section = None
        
        # Climb up DOM to parent div for section id
        parent = table
        for _ in range(5):  # Limit climbs
            parent = parent.find_parent("div")
            if not parent:
                break
            parent_id = parent.get("id", "")
            if parent_id in section_map:
                section = section_map[parent_id]
                break
        
        # If no section found by id, fallback to previous header/button logic
        if not section:
            current = table
            for _ in range(5):
                current = current.find_previous(['h2', 'h3', 'button'])
                if not current:
                    break
                tag = current.name
                if tag == 'button':
                    section = current.get_text(strip=True)
                    break
                elif tag in ['h2', 'h3']:
                    section = current.get_text(strip=True)
                    break

        # STEP 3: Extract course rows, as before
        tbody = table.find("tbody")
        if not tbody:
            continue
        rows = tbody.find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            if len(tds) < 3:
                continue  # Defensive
            # Find course name
            name_a = tds[1].find("a", class_="table-content")
            name_div = tds[1].find("div", class_="table-content")
            if name_a:
                course_name = name_a.get_text(strip=True)
            elif name_div:
                course_name = name_div.get_text(strip=True)
            else:
                course_name = ""
            # Find course code
            code_tag = tds[2].find("div", class_="table-content")
            course_code = code_tag.get_text(strip=True) if code_tag else ""
            results.append({
                "degree": degree_name,
                "section": section if section else "",
                "course_code": course_code,
                "course_name": course_name
            })
    return results


In [140]:
def extract_all_courses_with_sections(soup, degree_name):
    results = []

    # Section mapping from tab buttons
    section_map = {}
    tabs_div = soup.find("div", class_="cmp-course-info-by-year__year-tabs")
    if tabs_div:
        for btn in tabs_div.find_all("button"):
            target = btn.get("data-course-info-target", "")
            if target:
                section_id = target.lstrip("#")
                section_name = btn.get_text(strip=True)
                section_map[section_id] = section_name

    tables = soup.find_all("table", class_="cmp-data-table")
    for table in tables:
        section = None

        # Climb up to parent div with section id (tab selector)
        parent = table
        for _ in range(5):
            parent = parent.find_parent("div")
            if not parent:
                break
            parent_id = parent.get("id", "")
            if parent_id in section_map:
                section = section_map[parent_id]
                break

        if not section:
            section = ""

        # Row-wise stream detection (your working strategy per course row)
        tbody = table.find("tbody")
        if not tbody:
            continue
        rows = tbody.find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            if len(tds) < 3:
                continue  # Defensive
            
            from bs4.element import Tag

            # Locate the parent container of the table (accordion-body, tab item, etc.)
            parent_div = table.find_parent("div", class_="accordion-item")
            stream = ""
            if parent_div:
                h3_prev = parent_div.find("h3", class_="accordion-header")
                if h3_prev:
                    stream = h3_prev.get_text(strip=True)
            
            # Extract course name
            name_a = tds[1].find("a", class_="table-content")
            name_div = tds[1].find("div", class_="table-content")
            if name_a:
                course_name = name_a.get_text(strip=True)
            elif name_div:
                course_name = name_div.get_text(strip=True)
            else:
                course_name = ""

            # Extract course code
            code_tag = tds[2].find("div", class_="table-content")
            course_code = code_tag.get_text(strip=True) if code_tag else ""

            results.append({
                "degree": degree_name,
                "section": section,
                "stream": stream if stream else "",
                "course_code": course_code,
                "course_name": course_name
            })
    return results


In [142]:
all_courses = []

for degree in ba_degrees:
    response = requests.get(degree['url'])
    soup = BeautifulSoup(response.text, "html.parser")
    courses = extract_all_courses_with_sections(soup, degree['name'])
    all_courses.extend(courses)  # Add all courses to one big list

In [144]:
all_courses

[{'degree': 'Bachelor of Accounting',
  'section': 'Core courses',
  'stream': '',
  'course_code': 'ACCT1005',
  'course_name': 'UO Accounting for Business'},
 {'degree': 'Bachelor of Accounting',
  'section': 'Core courses',
  'stream': '',
  'course_code': 'ACCT1006',
  'course_name': 'UO Financial Accounting 1'},
 {'degree': 'Bachelor of Accounting',
  'section': 'Core courses',
  'stream': '',
  'course_code': 'BUSI1030',
  'course_name': 'UO Career Development in Business'},
 {'degree': 'Bachelor of Accounting',
  'section': 'Core courses',
  'stream': '',
  'course_code': 'ECON1010',
  'course_name': 'UO Principles of Economics'},
 {'degree': 'Bachelor of Accounting',
  'section': 'Core courses',
  'stream': '',
  'course_code': 'LAWS1018',
  'course_name': 'UO Business Law'},
 {'degree': 'Bachelor of Accounting',
  'section': 'Core courses',
  'stream': '',
  'course_code': 'MARK1013',
  'course_name': 'UO Marketing Principles: Trading and Exchange'},
 {'degree': 'Bachelor of A

In [146]:
import csv

fieldnames = ['degree', 'section',  'stream', 'course_code','course_name']

with open('accounting-commerce-economics-2.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(all_courses)

In [26]:
urls = [
    "https://adelaideuni.edu.au/study/study-areas/accounting-commerce-economics/",
    "https://adelaideuni.edu.au/study/study-areas/agriculture-animal-veterinary-science/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/allied-health/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/architecture-design/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/arts-humanities-social-sciences/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/aviation/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/business-marketing-management/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/computer-science-information-technology/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/creative-media-communication/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/engineering/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/health-biomedical-sciences/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/law-and-justice/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/mathematics-data-science/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/medicine-dentistry-oral-health/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/music/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/nursing-midwifery/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/nutrition-food-science/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/property-construction-real-estate/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/psychology-social-work/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/science-environment-sustainability/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/teaching-education/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/tourism-sport-events/#undergraduate"
]

In [34]:
def scrape_bachelor_courses_for_area(area_url):
    response = requests.get(area_url)
    soup = BeautifulSoup(response.text, "html.parser")
    # STEP 1: Find all degree links
    degrees = []
    for link in soup.find_all("a", href=True):
        href = link['href']
        text = link.get_text(strip=True)
        if "/degrees/" in href and text:
            if href.startswith('/'):
                href = f"https://adelaideuni.edu.au{href}"
            degrees.append({"name": text, "url": href})

    # STEP 2: Filter only bachelor degrees
    bachelor_degrees = [degree for degree in degrees if degree['name'].startswith('Bachelor')]

    # STEP 3: Scrape each degree page for courses
    all_courses = []
    for degree in bachelor_degrees:
        sub_resp = requests.get(degree['url'])
        sub_soup = BeautifulSoup(sub_resp.text, "html.parser")
        courses = extract_all_courses_with_sections(sub_soup, degree['name'])
        all_courses.extend(courses)

    # STEP 4: Export to CSV (name using area_url)
    import re
    area_match = re.search(r'study-areas/([\w\-]+)', area_url)
    area_name = area_match.group(1) if area_match else "output"
    filename = f"{area_name}.csv"

    fieldnames = ['degree', 'section', 'course_code', 'course_name']
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_courses)
    print(f"{filename} saved with {len(all_courses)} courses.")

In [36]:
urls = [
    "https://adelaideuni.edu.au/study/study-areas/accounting-commerce-economics/",
    "https://adelaideuni.edu.au/study/study-areas/agriculture-animal-veterinary-science/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/allied-health/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/architecture-design/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/arts-humanities-social-sciences/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/aviation/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/business-marketing-management/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/computer-science-information-technology/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/creative-media-communication/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/engineering/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/health-biomedical-sciences/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/law-and-justice/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/mathematics-data-science/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/medicine-dentistry-oral-health/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/music/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/nursing-midwifery/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/nutrition-food-science/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/property-construction-real-estate/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/psychology-social-work/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/science-environment-sustainability/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/teaching-education/#undergraduate",
    "https://adelaideuni.edu.au/study/study-areas/tourism-sport-events/#undergraduate"
]

In [38]:
for url in urls:
    scrape_bachelor_courses_for_area(url)

accounting-commerce-economics.csv saved with 1781 courses.
agriculture-animal-veterinary-science.csv saved with 629 courses.
allied-health.csv saved with 1453 courses.
architecture-design.csv saved with 759 courses.
arts-humanities-social-sciences.csv saved with 6277 courses.
aviation.csv saved with 358 courses.
business-marketing-management.csv saved with 3291 courses.
computer-science-information-technology.csv saved with 2333 courses.
creative-media-communication.csv saved with 3235 courses.
engineering.csv saved with 1760 courses.
health-biomedical-sciences.csv saved with 2961 courses.
law-and-justice.csv saved with 3286 courses.
mathematics-data-science.csv saved with 5280 courses.
medicine-dentistry-oral-health.csv saved with 312 courses.
music.csv saved with 1057 courses.
nursing-midwifery.csv saved with 76 courses.
nutrition-food-science.csv saved with 215 courses.
property-construction-real-estate.csv saved with 486 courses.
psychology-social-work.csv saved with 2131 courses.


## Filter

In [42]:
import pandas as pd

In [46]:
accounting = pd.read_csv('accounting-commerce-economics.csv')

In [74]:
accounting.section.value_counts()

section
Standard study plans                                                 574
Program electives - Business                                         231
Rules and notes                                                      163
Program core                                                         154
Program core selectives                                              120
Common core                                                          108
Program electives                                                     86
Electives                                                             61
Core courses                                                          58
Elective - Open                                                       40
Program electives - Procurement and Supply Chain Management major     30
Program electives - year 3                                            20
Program electives - Project Management major                          20
Major courses - selective                  

In [70]:
accounting[accounting['section'] == 'Common core']

Unnamed: 0,degree,section,course_code,course_name
150,Bachelor of Business,Common core,COREX001,An Ethically Rich Life
151,Bachelor of Business,Common core,COREX002,Fact or Fiction: Data for Everyone
152,Bachelor of Business,Common core,COREX003,Igniting Change: Ideas to Action
153,Bachelor of Business,Common core,COREX004,"Proppa Ways, Future Practice"
154,Bachelor of Business,Common core,COREX005,"Responsible AI: Bridging Ethics, Education and..."
...,...,...,...,...
1670,"Bachelor of Philosophy, Politics and Economics",Common core,COREX002,Fact or Fiction: Data for Everyone
1671,"Bachelor of Philosophy, Politics and Economics",Common core,COREX003,Igniting Change: Ideas to Action
1672,"Bachelor of Philosophy, Politics and Economics",Common core,COREX004,"Proppa Ways, Future Practice"
1673,"Bachelor of Philosophy, Politics and Economics",Common core,COREX005,"Responsible AI: Bridging Ethics, Education and..."
