# Extract MMU Program (By Faculty)

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

base_url = "https://www.mmu.edu.my/programmes/"
headers = {'User-Agent': 'Mozilla/5.0'}

# Step 1: Get all faculty links
response = requests.get(base_url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

faculty_links = []

# Select only the first div.col-md-4 inside div.row
first_col = soup.select("div.row div.col-md-4:first-of-type")

# Loop through the first div.col-md-4 only
for col in first_col:
    dropdown = col.select_one("ul.programme-dropdown")
    if dropdown:
        for tag in dropdown.select("li a"):
            name = tag.text.strip()
            href = tag.get("href")
            if href:
                faculty_links.append((name, href))

# Step 2-4: Loop through each faculty and extract programmes + entry requirements
programmes_data = []

for faculty_name, faculty_url in faculty_links:
    print(f"Processing faculty: {faculty_name}")
    try:
        faculty_res = requests.get(faculty_url, headers=headers)
        faculty_soup = BeautifulSoup(faculty_res.content, "html.parser")

        # Find programme links in this faculty page
        programme_tags = faculty_soup.select("div.faculty-programmes ul li a")

        for prog_tag in programme_tags:
            prog_name = prog_tag.text.strip()
            prog_url = prog_tag["href"]
            print(f"  -> Programme: {prog_name}")

            # Step 4: Visit programme page and get entry requirements
            prog_res = requests.get(prog_url, headers=headers)
            prog_soup = BeautifulSoup(prog_res.content, "html.parser")

            # Get the <ul class="course-dropdown"> content
            entry_ul = prog_soup.select_one("ul.course-dropdown")
            entry_req = entry_ul.get_text(separator=" ", strip=True) if entry_ul else "Not Found"

            programmes_data.append({
                "faculty": faculty_name,
                "programme": prog_name,
                "entry_requirement": entry_req
            })

            time.sleep(0.5)  # Be gentle with the server

    except Exception as e:
        print(f"Error processing {faculty_name}: {e}")

# Step 5: Save to DataFrame and CSV
df = pd.DataFrame(programmes_data)
df.to_csv("mmu_faculty_programmes.csv", index=False)
print("\n✅ Data saved to 'mmu_faculty_programmes.csv'")


Processing faculty: Faculty of Cinematic Arts (FCA)
  -> Programme: Diploma in Cinematography
  -> Programme: Bachelor of Cinematic Arts (Hons.)
Processing faculty: Faculty of Law (FOL)
  -> Programme: Master of Laws (By Research)
  -> Programme: Doctor of Philosophy (Ph.D.) Laws (By Research)
  -> Programme: Foundation in Law
  -> Programme: Bachelor of Law (Hons.)
Processing faculty: Faculty of Engineering & Technology (FET)
  -> Programme: Master of Engineering Science (By Research)
  -> Programme: Doctor of Philosophy (Ph.D.) Engineering (By Research)
  -> Programme: Foundation in Engineering
  -> Programme: Diploma in Electronic Engineering
  -> Programme: Diploma in Mechanical Engineering
  -> Programme: Bachelor of Engineering (Hons.) Electronics majoring in Telecommunications
  -> Programme: Bachelor of Electronics Engineering (Robotics & Automation) with Honours
  -> Programme: Bachelor of Mechanical Engineering with Honours
Processing faculty: Faculty of Artificial Intelligen

# EDA
- **Findings:** same programme offered by different faculty

In [2]:
import pandas as pd
df = pd.read_csv("mmu_faculty_programmes.csv")

print(df['faculty'].nunique())
print(df['programme'].nunique())

filtered_df = df[df.duplicated(subset='programme', keep=False)]
filtered_df

11
75


Unnamed: 0,faculty,programme,entry_requirement
6,Faculty of Engineering & Technology (FET),Master of Engineering Science (By Research),Fields of Research 2-D/3-D Modelling and Recon...
7,Faculty of Engineering & Technology (FET),Doctor of Philosophy (Ph.D.) Engineering (By R...,Fields of Research 2-D/3-D Modelling and Recon...
8,Faculty of Engineering & Technology (FET),Foundation in Engineering,Entry Requirements Pass SPM/O-Level or its equ...
14,Faculty of Artificial Intelligence & Engineeri...,Master of Engineering Science (By Research),Fields of Research (but not limited to) 5G Mob...
15,Faculty of Artificial Intelligence & Engineeri...,Doctor of Philosophy (Ph.D.) Engineering (By R...,Fields of Research (but not limited to) 5G Mob...
18,Faculty of Artificial Intelligence & Engineeri...,Foundation in Engineering,Entry Requirements Pass SPM/O-Level or its equ...
19,Faculty of Artificial Intelligence & Engineeri...,Bachelor of Science (Hons.) Intelligent Robotics,Entry Requirements Pass Foundation / Matricula...
24,Faculty of Computing & Informatics (FCI),Master of Computing (By Research),Fields of Research Artificial Intelligence Bio...
25,Faculty of Computing & Informatics (FCI),Master of Computer Science via ODL (By Coursew...,Entry Requirements A Bachelor’s degree (Level ...
26,Faculty of Computing & Informatics (FCI),Doctor of Philosophy (Ph.D.) in Computing (By ...,Fields of Research Artificial Intelligence Bio...


# Data Preprocessing
- Remove foundation, master, and doctor
- Keep programme for diploma and bachelor degree only

In [4]:
import pandas as pd

df = pd.read_csv("mmu_faculty_programmes.csv")
# Keywords to check for
keywords = ['foundation', 'master', 'doctor']

# Filter rows where Column C contains any of the specified keywords
df_filtered = df[~df['programme'].str.contains('|'.join(keywords), case=False, na=False)]

display(df_filtered)
df_filtered.to_csv("preprocessed_programme.csv",index=False)

Unnamed: 0,faculty,programme,entry_requirement
0,Faculty of Cinematic Arts (FCA),Diploma in Cinematography,Entry Requirements Pass SPM/O-Level or its equ...
1,Faculty of Cinematic Arts (FCA),Bachelor of Cinematic Arts (Hons.),Entry Requirements Pass Foundation/Matriculati...
5,Faculty of Law (FOL),Bachelor of Law (Hons.),Entry Requirements Pass Foundation/Matriculati...
9,Faculty of Engineering & Technology (FET),Diploma in Electronic Engineering,Entry Requirements Pass SPM/O-Level or its equ...
10,Faculty of Engineering & Technology (FET),Diploma in Mechanical Engineering,Entry Requirements Pass SPM/O-Level or its equ...
11,Faculty of Engineering & Technology (FET),Bachelor of Engineering (Hons.) Electronics ma...,Entry Requirements Pass Foundation/Matriculati...
12,Faculty of Engineering & Technology (FET),Bachelor of Electronics Engineering (Robotics ...,Entry Requirements Pass Foundation/Matriculati...
13,Faculty of Engineering & Technology (FET),Bachelor of Mechanical Engineering with Honours,Entry Requirements Pass Foundation/Matriculati...
19,Faculty of Artificial Intelligence & Engineeri...,Bachelor of Science (Hons.) Intelligent Robotics,Entry Requirements Pass Foundation / Matricula...
20,Faculty of Artificial Intelligence & Engineeri...,Bachelor of Engineering (Hons.) Electrical,Entry Requirements Pass Foundation/Matriculati...
