In [1]:
from bs4 import BeautifulSoup
from pathlib import Path
from tqdm.asyncio import tqdm as tqdm
from zipfile import ZipFile
from dataclasses import dataclass

import pandas as pd
import aiohttp
import socket
import asyncio
import os
import requests
import re
import json

# import logging
# import http.client
# http.client.HTTPConnection.debuglevel = 1

tqdm.pandas()

In [2]:
# Source: https://stackoverflow.com/questions/56950987/download-file-from-url-and-save-it-in-a-folder-python
def download(url: str, filename : str = "", dest_folder: str = ""):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)  # create folder if it does not exist

    if filename == "":
        filename = url.split('/')[-1].replace(" ", "_")  # be careful with file names

    file_path = os.path.join(dest_folder, filename)

    r = requests.get(url, stream=True)
    if r.ok:
        print("Saving to", os.path.abspath(file_path))
        with open(file_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024 * 8):
                if chunk:
                    f.write(chunk)
                    f.flush()
                    os.fsync(f.fileno())
    else:  # HTTP status code 4XX/5XX
        print("Download failed: status code {}\n{}".format(r.status_code, r.text))

In [3]:
def unzip(path: str):
    file = Path(path)

    with ZipFile(path, "r")  as zip:
        file_path = file.parents[0] / file.stem
        zip.extractall(file_path)

In [4]:
# school_info_columns = [ 
#     "school_name",
#     "url_address",
#     "address",
#     "postal_code",
#     "telephone_no",
#     "telephone_no_2",
#     "fax_no",
#     "fax_no_2",
#     "email_address",
#     "mrt_desc",
#     "bus_desc",
#     "principal_name",
#     "first_vp_name",
#     "second_vp_name",
#     "third_vp_name",
#     "fourth_vp_name",
#     "fifth_vp_name",
#     "sixth_vp_name",
#     "dgp_code",
#     "zone_code",
#     "type_code",
#     "nature_code",
#     "session_code",
#     "mainlevel_code",
#     "sap_ind",
#     "autonomous_ind",
#     "gifted_ind",
#     "ip_ind",
#     "mothertongue1_code",
#     "mothertongue2_code",
#     "mothertongue3_code"
# ]

if not os.path.exists("temp/school_info.zip"):
    download("https://data.gov.sg/dataset/c004b703-5e64-47db-a504-e60e74fd3b32/download", "school_info.zip", "temp")
    unzip("temp/school_info.zip")

In [5]:
def get_name(school_name:str):
    name = school_name.lower()
    return name.title().replace("'S", "'s")

semaphore = asyncio.Semaphore(4)

async def get_school_location(session, query, index):
    async with semaphore:  # next coroutine(s) will stuck here until the previous is done
        # await asyncio.sleep(0.5)

        query = query.replace(", Singapore", "")
        query = query.replace(".", "")

        payload = {
            "searchVal": query,
            "returnGeom": "Y",
            "getAddrDetails": "Y",
            "pageNum": 1
        }

        url = "https://developers.onemap.sg/commonapi/search"
        response = await session.request(method='GET', url=url, params=payload)
        response = await response.json()

        if len(response["results"]) > 0:
            return index, response["results"][0]["LATITUDE"], response["results"][0]["LONGITUDE"]
        else:
            print(f"{query} not found (index: {index}, url:{url})")
            pass

if not os.path.exists("temp/school_list_incomplete.json"):
    school_info = pd.read_csv("temp/school_info/general-information-of-schools.csv")
    to_drop = [ 
        # "school_name",
        # "url_address",
        # "address",
        # "postal_code",
        # "telephone_no",
        "telephone_no_2",
        "fax_no",
        "fax_no_2",
        # "email_address",
        "mrt_desc",
        "bus_desc",
        "principal_name",
        "first_vp_name",
        "second_vp_name",
        "third_vp_name",
        "fourth_vp_name",
        "fifth_vp_name",
        "sixth_vp_name",
        "dgp_code",
        "zone_code",
        "type_code",
        "nature_code",
        "session_code",
        # "mainlevel_code",
        "sap_ind",
        "autonomous_ind",
        "gifted_ind",
        "ip_ind",
        "mothertongue1_code",
        "mothertongue2_code",
        "mothertongue3_code"
    ]
        
    school_info.drop(to_drop, inplace=True, axis=1)
    school_info.rename(columns={
        "school_name": "name", 
        "mainlevel_code": "level",
        "url_address": "website_url"
    }, inplace=True)

    school_info["name"] = [ get_name(name) for name in school_info["name"]]
    school_info["level"] = [ level.title() for level in school_info["level"]]
    school_info["address"] = [ re.sub(" +", " ", address.title()) for address in school_info["address"]]
    school_info["website_url"] = [ website_url.lower() for website_url in school_info["website_url"]]
    school_info["email_address"] = [ email_address.lower() for email_address in school_info["email_address"]]

    # async with aiohttp.ClientSession(timeout=1) as session:
    #     tasks = [get_school_location(session, query, index) for index, query in enumerate(school_info[:]["name"])]
    #     response = [await future for future in tqdm(asyncio.as_completed(tasks), total=len(tasks))]
    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(family=socket.AF_INET)) as session:
        tasks = [get_school_location(session, query, index) for index, query in enumerate(school_info[:]["name"])]
        response = await tqdm.gather(*tasks)
        temp = sorted(response, key=lambda x: x[0])
        results = [(x[1], x[2]) for x in temp]

        school_info[["latitude", "longitude"]] = pd.DataFrame(results, index=school_info.index)

    column_names = [ "name", "level", "address", "postal_code", "latitude", "longitude", "website_url", "email_address", "telephone_no" ]
    school_info = school_info.reindex(columns=column_names)

    if not os.path.exists("out"):
        os.makedirs("out")

    school_info.to_json("temp/school_list_incomplete.json", orient="records")
    school_info.head()

In [6]:
if os.path.exists("temp/school_list_incomplete_2.json"):
    with open("temp/school_list_incomplete_2.json", "r") as f:
        school_list = pd.DataFrame(json.load(f))

In [7]:
# Get all ITE Schools

if not os.path.exists("temp/school_list_incomplete_2.json"):
    ite_schools_dict = {}

    page = requests.get("https://www.ite.edu.sg/who-we-are/get-in-touch")
    soup = BeautifulSoup(page.content)

    name_query = soup.find_all("strong")
    for elem in name_query:
        key = re.sub('[^a-zA-Z0-9 \n\.]', "", elem.text).strip()

        if key not in ite_schools_dict:
            if "ITE College" in key:
                if key.startswith("ITE") == False:
                    continue

                ite = {}
        ite_schools_dict = {}

        page = requests.get("https://www.ite.edu.sg/who-we-are/get-in-touch")
        soup = BeautifulSoup(page.content)

        name_query = soup.find_all("strong")
        for elem in name_query:
            key = re.sub('[^a-zA-Z0-9 \n\.]', "", elem.text).strip()

            if key not in ite_schools_dict:
                if "ITE College" in key:
                    if key.startswith("ITE") == False:
                        continue

                    ite = {}
                    ite["name"] = key.replace("ITE ", "Institute of Technical Education - ")
                    ite["level"] = "ITE"

                    temp = key.replace(" ", "-").lower()
                    ite["website_url"] = f"https://www.ite.edu.sg/colleges/{temp}"

                    temp = elem.next_sibling
                    while temp.text.startswith("Email:") == False:
                        temp = temp.next_sibling

                    temp = temp.next_element

                    if "training@ite.edu.sg" in temp.text:
                        temp = temp.next_sibling
                        temp = temp.next_element
                    
                    ite["email_address"] = temp.text
                    ite_schools_dict[key] = ite
            else:
                ite = ite_schools_dict[key]
                
                temp = elem.findNext("br").next_sibling
                temp_address = temp.text
                temp_address = temp_address.replace("Address: ", "")
                temp_address = temp_address.replace(", Singapore", "")

                if match := re.search("\d{6}", temp_address, re.IGNORECASE):
                    postal = match.group()
                    ite["postal_code"] = postal

                    temp_address = temp_address.replace(postal, "").strip()
                    ite["address"] = temp_address

                temp = temp.findNext("br").next_sibling
                temp_tel = temp.text
                temp_tel = temp_tel.replace("Tel: (+65) ", "")
                temp_tel = temp_tel.replace(" ", "")

                ite["telephone_no"] = temp_tel

        ite_schools = list(ite_schools_dict.values())

    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(family=socket.AF_INET)) as session:
        tasks = [get_school_location(session, school["name"].replace(" - ", " "), index) for index, school in enumerate(ite_schools)]
        temp = await tqdm.gather(*tasks)
        results = [(x[1], x[2]) for x in temp]

        for index, latlong in enumerate(results):
            ite_schools[index]["latitude"] = latlong[0]
            ite_schools[index]["longitude"] = latlong[1]

    with open("temp/school_list_incomplete.json", "r") as f:
        school_list = pd.DataFrame(json.load(f))
        updated_school_list = school_list.append(pd.DataFrame(ite_schools))
        sorted_school_list = updated_school_list.sort_values(by=["name"], ascending=True)
        sorted_school_list.to_json("temp/school_list_incomplete_2.json", orient="records")

In [8]:
if os.path.exists("temp/school_list_incomplete_2.json"):
    with open("temp/school_list_incomplete_2.json", "r") as f:
        school_list = pd.DataFrame(json.load(f))

In [9]:
# Get all ITE Schools

if not os.path.exists("temp/school_list_incomplete_3.json"):
    ite_schools_dict = {}

    page = requests.get("https://www.moe.gov.sg/microsites/whats-next/for-gce-o-level-students/where-do-i-want-to-go/i-want-to-further-my-studies/polytechnics/index.html")
    soup = BeautifulSoup(page.content)

    name_query = soup.find_all("a", { "rel":"noopener", "target": "_blank" })

    poly_schools = []
    
    for elem in name_query:
        poly = {}
        poly["name"] = re.sub("[\(\[].*?[\)\]]", "", elem.text).strip()
        poly["level"] = "Polytechnic"
        poly["website_url"] = elem.attrs["href"]

        page_detailed = requests.get("https://www.moe.gov.sg/schoolfinder/schooldetail", params={
            "schoolname": poly["name"]
        })
        soup_detailed = BeautifulSoup(page_detailed.content)
        poly["email_address"] = soup_detailed.select("a[href^=\"mailto:\"]")[0].text

        temp_address = soup_detailed.select("a[href^=\"https://www.google.com/maps/place/\"]")[0].text

        if match := re.search("\d{6}", temp_address, re.IGNORECASE):
            postal = match.group()
            poly["postal_code"] = postal

            temp_address = temp_address.replace(postal, "").strip()
            poly["address"] = temp_address.replace(", S", "")

        poly["telephone_no"] = (soup_detailed.find("span", text="Phone:")) \
                                .next_sibling \
                                .next_sibling \
                                .text \
                                .replace(" ", "")
        
        poly_schools.append(poly)

    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(family=socket.AF_INET)) as session:
        tasks = [get_school_location(session, school["name"], index) for index, school in enumerate(poly_schools)]
        temp = await tqdm.gather(*tasks)
        results = [(x[1], x[2]) for x in temp]

        for index, latlong in enumerate(results):
            poly_schools[index]["latitude"] = latlong[0]
            poly_schools[index]["longitude"] = latlong[1]

    with open("temp/school_list_incomplete_2.json", "r") as f:
        school_list = pd.DataFrame(json.load(f))
        updated_school_list = school_list.append(pd.DataFrame(poly_schools))
        sorted_school_list = updated_school_list.sort_values(by=["name"], ascending=True)
        sorted_school_list.to_json("temp/school_list_incomplete_3.json", orient="records")

In [10]:
if os.path.exists("temp/school_list_incomplete_3.json"):
    with open("temp/school_list_incomplete_3.json", "r") as f:
        school_list = json.load(f)
        # school_list = pd.DataFrame(json.load(f))

In [11]:
semaphore = asyncio.Semaphore(5)

def get_name(school):
    name = re.sub(r"[^A-Za-z0-9 ]", "", school["name"]) \
                .lower()

    if school["level"] == "primary":
        if "primary" not in name:
            name += " primary section"
    elif school["level"] == "secondary":
        if "secondary school" not in name:
            name += " secondary school"
    elif school["level"] == "junior college":
        if "junior college" not in name:
            name += " junior college"

    return name

async def fetch(session, school):
    response =  await session.get("https://www.moe.gov.sg/schoolfinder/schooldetail", params={"schoolname": get_name(school)})
    return await response.text()

def get_cca(school, soup):
    ccas = []
    elem = soup.find("span", text=re.compile("CCAs")) \

    if elem is not None:
        elem = elem.find_next("ul")
        for li in elem.find_all("li"):
            ccas.append(li.text)
    else:
        print(f"Warning: No CCAs found in {school['name']}")

    school["ccas"] = ccas

def get_subjects(school, soup):
    subjects = []
    elem = soup.find("span", text="Subjects offered") \

    if elem is not None:
        elem = elem.find_next("ul")

        for li in elem.find_all("li"):
            subjects.append(li.text)
    else:
        print(f"Warning: No subjects found in {school['name']}")

    school["subjects"] = subjects



def get_primary_school_details(school, soup):
    get_subjects(school, soup)
    pass

def get_secondary_school_details(school, soup):
    get_subjects(school, soup)

    #get aggregate
    try:    
        data = []
        table = soup.find("table")
        table_body = table.find('tbody')

        rows = table_body.find_all('tr')
        for row in rows:
            cols = row.find_all('td')
            cols = [elem.text.strip() for elem in cols]
            data.append([elem for elem in cols if elem]) # Get rid of empty values

        cutoff = []
        # cutoff.append({ affiliated["express"]           = data[0][0] })
        # cutoff.append({ affiliated["normal_academic"]   = data[1][0] })
        # cutoff.append({ affiliated["normal_technical"]  = data[2][0] })
        cutoff.append({ "name": "Express (Affiliated)", "range": data[0][0] })
        cutoff.append({ "name": "Normal Academic (Affiliated)", "range": data[1][0] })
        cutoff.append({ "name": "Normal Technical (Affiliated)", "range": data[2][0] })

        cutoff.append({ "name": "Express (Non-Affiliated)", "range": data[0][1] })
        cutoff.append({ "name": "Normal Academic (Non-Affiliated)", "range": data[1][1] })
        cutoff.append({ "name": "Normal Technical (Non-Affiliated)", "range": data[2][1] })

        school["cutoff"] = cutoff
    except BaseException as e:
        print(f"get_secondary_school_details({school['name']}): {e}")

def get_jc_details(school, soup):
    get_subjects(school, soup)

    try:
        elem = soup.find(text=re.compile("aggregate"))

        cutoff = []

        for item in elem.parent.find_all("span"):
            aggregate = {
                "name": "",
                "range": ""
            }

            temp = item.text.split(":")
            aggregate["name"] = temp[0].strip()
            aggregate["range"] = temp[1].strip()
            
            cutoff.append(aggregate)

        school["cutoff"] = cutoff
    except BaseException as e:
        print(f"get_jc_details({school['name']}): {e}")

def get_ite_details(school, soup):
    get_cca(school, soup)

def get_poly_details(school, soup):
    get_cca(school, soup)

async def get_school_details(session, school):
    if school["level"] == "Mixed Levels": # if "Mixed School" then "Prompt user to look at site for details :d"
        print(f"Skipping {school['name']} because it is a 'Mixed Level' school")
        return

    async with semaphore:
        await asyncio.sleep(1)

        html = await fetch(session, school)
        soup = BeautifulSoup(html)

        if(soup.find(text=" You do not have permission to access this page. ")):
            print(f"Error: Encountered error when scrapping '{get_name(school)}'")

        get_cca(school, soup)

        if school["level"] == "Primary": # Can view Subjects, CCAs
            get_primary_school_details(school, soup)

        if school["level"] == "Secondary": # Can view PSLE range (Table: rows(stream) cols(affliation)), Subjects, CCAs
            get_secondary_school_details(school, soup)

        if school["level"] == "Junior College": # Can view JAE L1R5 aggregate, Subjects, CCAs
            get_jc_details(school, soup)

        if school["level"] == "ITE": # Courses, CCA
            get_ite_details(school, soup)

        if school["level"] == "Polytechnic": # Courses, CCA
            get_poly_details(school, soup)
            pass

if not os.path.exists("temp/school_list_incomplete_4.json"):
    with open("temp/school_list_incomplete_3.json", "r") as f:
        school_list = json.load(f)

    async with aiohttp.ClientSession() as session:
        tasks = [ get_school_details(session, school) for school in school_list ]
        await tqdm.gather(*tasks)

        school_list = pd.DataFrame(school_list)
        school_list.to_json("temp/school_list_incomplete_4.json", orient="records")

In [12]:
if not os.path.exists("temp/school_list_incomplete_5.json"):
    np_courses  = "https://data.gov.sg/dataset/bad6a2b7-62e3-4cd3-a7f7-e9e055bd38d3/download"
    tp_courses  = "https://data.gov.sg/dataset/b72f297b-231d-4ba2-9998-86a93a0a903f/download"
    rp_courses  = "https://data.gov.sg/dataset/29e98e15-903a-49ac-b766-142f9257c7be/download"
    sp_courses  = "https://data.gov.sg/dataset/3a36f942-55e4-4943-9479-ddffdc903c37/download"
    nyp_courses = "https://data.gov.sg/dataset/10ff4aaf-f998-4588-88c5-057382770a00/download"
    ite_courses = "https://data.gov.sg/dataset/c858dc4d-012d-487f-99ea-df557b8609b5/download"

    def download_course(url, name):
        if not os.path.exists(f"temp/{name}.zip"):
            download(url, f"{name}.zip", "temp")
            unzip(f"temp/{name}.zip")

    download_course(np_courses, "np_courses")
    download_course(tp_courses, "tp_courses")
    download_course(rp_courses, "rp_courses")
    download_course(sp_courses, "sp_courses")
    download_course(nyp_courses, "nyp_courses")
    download_course(ite_courses, "ite_courses")

    # Ngee Ann Poly
    np_courses = pd.read_csv("temp/np_courses/2021-full-time-diploma-courses.csv")

    to_drop = [ "year","school","course_description" ]
    np_courses.drop(columns=to_drop, inplace=True)
    np_courses.rename(columns={
        "course_code": "code",
        "course_name": "name",
        "reference": "website_url"
    }, inplace=True)
    np_courses = np_courses.reindex(columns=[ "code", "name", "website_url" ])

    # Temasek Poly
    tp_courses = pd.read_csv("temp/tp_courses/academic-year-2020-temasek-polytechnic-full-time-courses.csv")

    to_drop = [ "year","school","course_description", "poly_course_code", "course_description" ]
    tp_courses.drop(columns=to_drop, inplace=True)
    tp_courses.rename(columns={
        "moe_course_code": "code",
        "course_name": "name",
        "reference": "website_url"
    }, inplace=True)
    tp_courses = tp_courses.reindex(columns=[ "code", "name", "website_url" ])

    # Republic Poly
    rp_courses = pd.read_csv("temp/rp_courses/republic-polytechnic-full-time-diploma-courses-2019.csv")

    # year,school,course_name,course_abbreviation,course_code,reference
    to_drop = [ "year","school","course_abbreviation" ]
    rp_courses.drop(columns=to_drop, inplace=True)
    rp_courses.rename(columns={
        "course_code": "code",
        "course_name": "name",
        "reference": "website_url"
    }, inplace=True)
    rp_courses = rp_courses.reindex(columns=[ "code", "name", "website_url" ])

    # Singapore Poly
    sp_courses = pd.read_csv("temp/sp_courses/sp-full-time-diploma-courses.csv")

    # year,school,course_name,course_description,course_code,reference
    to_drop = [ "year","school","course_description" ]
    sp_courses.drop(columns=to_drop, inplace=True)
    sp_courses.rename(columns={
        "course_code": "code",
        "course_name": "name",
        "reference": "website_url"
    }, inplace=True)
    sp_courses = sp_courses.reindex(columns=[ "code", "name", "website_url" ])

    # Nanyang Poly
    nyp_courses = pd.read_csv("temp/nyp_courses/nanyang-polytechnic-full-time-diploma-courses-2016-to-2021.csv")
    nyp_courses = nyp_courses[nyp_courses["academic_year"] == 2021]

    # academic_year,jae_course_code,course_name,url
    to_drop = [ "academic_year" ]
    nyp_courses.drop(columns=to_drop, inplace=True)
    nyp_courses.rename(columns={
        "jae_course_code": "code",
        "course_name": "name",
        "url": "website_url"
    }, inplace=True)
    nyp_courses["name"] = [ course.title().replace("In", "in") for course in nyp_courses["name"]]
    nyp_courses = nyp_courses.reindex(columns=[ "code", "name", "website_url" ])

    # ITE
    ite_courses = pd.read_csv("temp/ite_courses/ite-course-catalog.csv")
    ite_courses = ite_courses[(ite_courses["course_type"] == "Full-Time") & (ite_courses["year"] == 2021)]

    # year,course_type,course_name
    to_drop = [ "year", "course_type" ]
    ite_courses.drop(columns=to_drop, inplace=True)
    ite_courses.rename(columns={ "course_name": "name" }, inplace=True)

    with open("temp/school_list_incomplete_4.json", "r") as f:
        school_list = json.load(f)

        np = next(school for school in school_list if school["name"] == "Ngee Ann Polytechnic")
        np["courses"] = np_courses.to_dict("records")

        tp = next(school for school in school_list if school["name"] == "Temasek Polytechnic")
        tp["courses"] = tp_courses.to_dict("records")

        rp = next(school for school in school_list if school["name"] == "Republic Polytechnic")
        rp["courses"] = rp_courses.to_dict("records")

        sp = next(school for school in school_list if school["name"] == "Singapore Polytechnic")
        sp["courses"] = sp_courses.to_dict("records")

        nyp = next(school for school in school_list if school["name"] == "Nanyang Polytechnic")
        nyp["courses"] = nyp_courses.to_dict("records")

        for ite in school_list:
            if "Institute of Technical Education" in ite["name"]:
                ite["courses"] = ite_courses.to_dict("records")

    school_list = pd.DataFrame(school_list)
    school_list.to_json("temp/school_list_incomplete_5.json", orient="records")


In [13]:
semaphore = asyncio.Semaphore(5)

def get_course_name(course):
    name = course["name"] \
                .replace(",", "") \
                .replace("Diploma in ", "") \
                .replace("&", "and") \
                .replace(" ", "-")

    return f"{course['code']}-{name}".lower()

async def fetch_course(session, course):
    response =  await session.get("https://www.moe.gov.sg/coursefinder/coursedetail", params={"course": get_course_name(course)})
    return await response.text()

async def get_course_details(session, course):

    async with semaphore:
        await asyncio.sleep(1)

        html = await fetch_course(session, course)
        soup = BeautifulSoup(html)

        if(soup.find(text="Page not found")):
            print(f"{course['name']} does not exist anymore..")
            course["remove"] = True
            return

        try:
            elem = soup.find(text=re.compile("aggregate:"))

            aggregate = {
                "name": "",
                "range": "",
            }

            aggregate["name"] = elem.text \
                                    .replace("2021 JAE", "") \
                                    .replace("aggregate", "") \
                                    .strip()

            elem = elem.parent.find("span")
            aggregate["range"] = elem.text \
                                    .strip()
                
            course["cutoff"] = aggregate
        except:
            print(f"Warning: {course} does not have cutoff")

if not os.path.exists("out/school_list.json"):
    with open("temp/school_list_incomplete_5.json", "r") as f:
        school_list = json.load(f)

    for school in school_list:
        if school["level"] == "Polytechnic":
                async with aiohttp.ClientSession() as session:
                    print(f"Getting course details from '{school['name']}'..")
                    tasks = [ get_course_details(session, course) for course in school["courses"] ]
                    await tqdm.gather(*tasks)

                    courses = pd.DataFrame(school["courses"])
                    
                    if "remove" in courses:
                        courses = courses[courses["remove"] != True]
                        school["courses"] = courses.to_dict("record")

    school_list = pd.DataFrame(school_list)
    school_list.to_json("out/school_list.json", orient="records")

In [14]:
with open("out/school_list.json", "r") as f:
    school_list = pd.DataFrame(json.load(f))

school_list.groupby(by=school_list["level"]).first()

Unnamed: 0_level_0,name,address,postal_code,latitude,longitude,website_url,email_address,telephone_no,ccas,subjects,cutoff,courses
level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Centralised Institute,Millennia Institute,60 Bukit Batok West Avenue 8,658965,1.35031930770024,103.741593577687,http://www.millenniainstitute.moe.edu.sg,millennia_inst@moe.edu.sg,63023700,"[Badminton (Girls and Boys), Basketball (Girls...",,,
ITE,Institute of Technical Education - College Cen...,2 Ang Mo Kio Drive,567720,1.37789204869901,103.856411982029,https://www.ite.edu.sg/colleges/ite-college-ce...,college_central@ite.edu.sg,65902211,"[Balloon Sculpting Club, Craft & Unfold, Flori...",,,"[{'name': 'Higher Nitec in Biotechnology'}, {'..."
Junior College,Anderson Serangoon Junior College,4500 Ang Mo Kio Avenue 6,569843,1.37894408046934,103.845760935132,www.asrjc.moe.edu.sg,asrjc@moe.edu.sg,64596822,"[Shooting (Girls and Boys), Badminton (Girls a...","[Chinese Language Syllabus B, Biology, Chemist...","[{'name': 'Arts', 'range': '5 - 11'}, {'name':...",
Mixed Levels,Anglo-Chinese School (Independent),121 Dover Road,139650,1.30370409257783,103.780363417671,http://www.acsindep.moe.edu.sg,acis@moe.edu.sg,67731633,,,,
Polytechnic,Nanyang Polytechnic,180 Ang Mo Kio Avenue 8,569830,1.38111875111583,103.849736066713,https://www.nyp.edu.sg/,askNYP@nyp.edu.sg,64515115,"[SBM Club, SCL Club, SDN Club, SEG Club, SHSS ...",,,"[{'code': 'C32', 'name': 'Diploma in Experient..."
Primary,Admiralty Primary School,11 Woodlands Circle,738907,1.4426347903311,103.800040119743,https://admiraltypri.moe.edu.sg/,admiralty_ps@moe.edu.sg,63620598,"[Football (Girls and Boys), Track and Field (G...","[Art, Chinese Language, English Language, Foun...",,
Secondary,Admiralty Secondary School,31 Woodlands Crescent,737916,1.44589068910993,103.802398196596,http://www.admiraltysec.moe.edu.sg,admiralty_ss@moe.edu.sg,63651733,"[Badminton (Girls and Boys), Netball (Girls), ...","[Basic Chinese Language, Computer Applications...","[{'name': 'Express (Affiliated)', 'range': '-'...",


In [15]:
school_list[school_list["level"] == "Secondary"]

Unnamed: 0,name,level,address,postal_code,latitude,longitude,website_url,email_address,telephone_no,ccas,subjects,cutoff,courses
1,Admiralty Secondary School,Secondary,31 Woodlands Crescent,737916,1.44589068910993,103.802398196596,http://www.admiraltysec.moe.edu.sg,admiralty_ss@moe.edu.sg,63651733,"[Badminton (Girls and Boys), Netball (Girls), ...","[Basic Chinese Language, Computer Applications...","[{'name': 'Express (Affiliated)', 'range': '-'...",
3,Ahmad Ibrahim Secondary School,Secondary,751 Yishun Avenue 7,768928,1.43605975368804,103.829718690077,http://www.ahmadibrahimsec.moe.edu.sg,aiss@moe.edu.sg,67585384,"[Shooting (Girls and Boys), Basketball (Girls ...","[Music Preparatory Course, Computer Applicatio...","[{'name': 'Express (Affiliated)', 'range': '-'...",
8,Anderson Secondary School,Secondary,10 Ang Mo Kio Street 53,569206,1.37434001701177,103.851554107068,http://www.andersonsec.moe.edu.sg,anderson_ss@moe.edu.sg,64598303,"[Basketball (Girls and Boys), Netball (Girls),...","[Basic Chinese Language, Computer Applications...","[{'name': 'Express (Affiliated)', 'range': '-'...",
11,Ang Mo Kio Secondary School,Secondary,6 Ang Mo Kio Street 22,569362,1.36733710171069,103.842154973672,http://www.angmokiosec.moe.edu.sg,amkss@moe.edu.sg,64548605,"[Badminton (Girls and Boys), Basketball (Boys)...","[Art, Design & Technology, Food & Consumer Edu...","[{'name': 'Express (Affiliated)', 'range': '-'...",
12,Anglican High School,Secondary,600 Upper Changi Road,487012,1.33118109596833,103.941853443058,http://www.anglicanhigh.moe.edu.sg,ahs@moe.edu.sg,62414866,"[Badminton (Girls and Boys), Basketball (Girls...","[Iscore 1, Iscore 2, Appreciation of Chinese C...","[{'name': 'Express (Affiliated)', 'range': '-'...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,Yuhua Secondary School,Secondary,35 Jurong West Street 41,649406,1.34718626876968,103.722687235839,http://www.yuhuasec.moe.edu.sg,yuhua_ss@moe.edu.sg,65661985,"[Football (Boys), Wushu (Girls and Boys), Voll...","[Alp, Cce, Art, Basic Chinese Language, Comput...","[{'name': 'Express (Affiliated)', 'range': '-'...",
347,Yusof Ishak Secondary School,Secondary,11 Bukit Batok Street 25,658712,1.34233921440623,103.760032628302,http://www.yusofishaksec.moe.edu.sg,yiss@moe.edu.sg,65009800,"[Badminton (Girls and Boys), Basketball (Boys)...","[Chinese Language Syllabus B, Basic Chinese La...",,
348,Yuying Secondary School,Secondary,47 Hougang Avenue 1,538884,1.35713595302467,103.890194755287,http://www.yuyingsec.moe.edu.sg,yuying_ss@moe.edu.sg,62827968,"[Badminton (Boys), Basketball (Boys), Sepaktak...","[Basic Chinese Language, Computer Applications...","[{'name': 'Express (Affiliated)', 'range': '-'...",
351,Zhenghua Secondary School,Secondary,91 Senja Road,677741,1.38836583415352,103.765510638527,http://www.zhenghuasec.moe.edu.sg,zhenghua_ss@moe.edu.sg,67639455,"[Basketball (Boys), Netball (Girls), Football ...","[Art, Design & Technology, Food & Consumer Edu...","[{'name': 'Express (Affiliated)', 'range': '-'...",


In [18]:
school = school_list[school_list["name"] == "Nanyang Polytechnic"]["courses"]
print(school.count)

<bound method Series.count of 185    [{'code': 'C32', 'name': 'Diploma in Experient...
Name: courses, dtype: object>
