<a href="https://colab.research.google.com/github/Vortable/colab-projects/blob/main/UniversityScrape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install Libraries

In [None]:
!pip install requests beautifulsoup4 pandas




Import Libraries


In [None]:

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time



Web Scraper

In [None]:

# Step 1: Load the main page
main_url = "https://studyabroadaide.com/all-universities-canada/"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(main_url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

# Step 2: Extract university rows
table = soup.find("table", class_="saa_table")
rows = table.find("tbody").find_all("tr")

# Step 3: Prepare data container
universities = []

# Step 4: Loop through each university
for row in rows:
    cols = row.find_all("td")
    name_tag = cols[0].find("a")
    name = name_tag.get_text(strip=True)
    detail_url = name_tag["href"]
    location = cols[1].get_text(strip=True)
    uni_type = cols[2].get_text(strip=True)

    # Visit the detail page
    try:
        detail_response = requests.get(detail_url, headers=headers)
        detail_soup = BeautifulSoup(detail_response.content, "html.parser")

        # Official website
        website_div = detail_soup.find("div", class_="saa-institution-link")
        website = website_div.find("a")["href"] if website_div else ""

        # Tuition fees
        def get_fees(section_class):
            fees = {"Domestic": "", "International": ""}
            section = detail_soup.find("h3", class_=section_class)
            if section:
                table = section.find_next("table")
                if table:
                    for tr in table.find_all("tr"):
                        tds = tr.find_all("td")
                        if len(tds) == 2:
                            student_type = tds[0].get_text(strip=True)
                            fee = tds[1].get_text(strip=True)
                            if "Domestic" in student_type:
                                fees["Domestic"] = fee
                            elif "International" in student_type:
                                fees["International"] = fee
            return fees

        bachelor_fees = get_fees("saa-institution-bachelors-tuition-fees")
        master_fees = get_fees("saa-institution-masters-tuition-fees")

        # Programs
        def get_programs(table_class):
            programs = []
            table = detail_soup.find("table", class_=table_class)
            if table:
                for tr in table.find_all("tr"):
                    td = tr.find("td")
                    if td:
                        programs.append(td.get_text(strip=True))
            return programs

        bachelor_programs = get_programs("saa-bachelors-courses")
        master_programs = get_programs("saa-masters-courses")

        # Append data
        universities.append({
            "University Name": name,
            "Location": location,
            "Type": uni_type,
            "Detail Page": detail_url,
            "Official Website": website,
            "Bachelor Tuition (Domestic)": bachelor_fees["Domestic"],
            "Bachelor Tuition (International)": bachelor_fees["International"],
            "Master Tuition (Domestic)": master_fees["Domestic"],
            "Master Tuition (International)": master_fees["International"],
            "Bachelor Programs": ", ".join(bachelor_programs),
            "Master Programs": ", ".join(master_programs)
        })

        print(f"Scraped: {name}")
        time.sleep(1)  # Be polite to the server

    except Exception as e:
        print(f"Failed to scrape {name}: {e}")

# Step 5: Save to CSV
df = pd.DataFrame(universities)
df.to_csv("canadian_universities.csv", index=False)
print("✅ Scraping complete. File saved as 'canadian_universities.csv'")




Scraped: Saint Mary’s University, Halifax
Scraped: Red Deer Polytechnic
Scraped: Confederation College
Scraped: St. Lawrence College
Scraped: Durham College
Scraped: Algonquin College
Scraped: College of the Rockies
Scraped: Conestoga College
Scraped: College of New Caledonia
Scraped: Southern Alberta Institute of Technology
Scraped: Northern Alberta Institute of Technology
Scraped: North Island College
Scraped: New York Institute of Technology Vancouver
Scraped: Yorkville University
Scraped: University of Fredericton
Scraped: Tyndale University
Scraped: St. Stephen’s University
Scraped: Providence University College and Theological Seminary
Scraped: Kingswood University
Scraped: Burman University
Scraped: Yukon University
Scraped: Université de l’Ontario français
Scraped: Université de Hearst
Scraped: Saint Francis Xavier University
Scraped: Nova Scotia College of Art and Design University
Scraped: University College of the North
Scraped: Algoma University
Scraped: Canadore College
Sc

Output

In [None]:

from google.colab import files
files.download("canadian_universities.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>