# Scrape by Instructors

This notebook takes in a data frame of instructors' names and filter classes based on them

In [None]:
#Load selinimum and automatically install the Chrome Driver
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd
import re

from selenium.webdriver.chrome.service import Service 
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from getpass import getpass

from Login import *

In [None]:
df = pd.read_csv('CMSE_Names.csv')
driver = get_driver('Chrome')
time.sleep(3)
wait = WebDriverWait(driver, 20)
login_to_SIS(driver, authenticate='Phone')
semesters_list, previous_semesters = get_semesters_list(driver)

In [None]:
def search_by_instructor(instructor, semester):
    try:
        switch_to_semester(driver, semester, previous_semesters)
        try:
            input = driver.find_element(By.ID, 'MSU_CLSRCH_WRK2_LAST_NAME')
        except:
            input = None
        
        if not input:
            element = driver.find_element(By.ID, 'MSU_CLSRCH_WRK2_GROUP3')  # Click the Instructors filter
            driver.execute_script("arguments[0].click();", element)
            time.sleep(1)
            try: 
                element = driver.find_element(By.ID, 'MSU_CLSRCH_WRK2_MSU_LOAD_INSTRUCTS') # Click Load Instructors
                driver.execute_script("arguments[0].click();", element)
                input = wait.until(EC.element_to_be_clickable((By.ID, 'MSU_CLSRCH_WRK2_LAST_NAME')))
                time.sleep(1) # More time to adjust
            except:
                pass # Some older semesters automatically click Load Instructors for us
        
        input.clear()
        input.send_keys(instructor) 
        time.sleep(1)
        try: 
            element = driver.find_element(By.ID, "win9divMSU_CLSRCH_WRK2_GROUPBOX15")
            first = element.find_element(By.CSS_SELECTOR, "tr:not([style='display: none;'])") # Find the first check box
        except:
            return None
        
        driver.execute_script("arguments[0].click();", first.find_element(By.CLASS_NAME, "ps-checkbox")) # Click the first check box
        element = driver.find_element(By.ID, 'MSU_CLSRCH_WRK2_SEARCH_BTN') # Hit Search
        driver.execute_script("arguments[0].click();", element)
        wait.until(EC.any_of(EC.element_to_be_clickable((By.ID, "DESCR100$0_row_0")),
                             EC.element_to_be_clickable((By.ID, "#ICOK"))))
                   
        try: # For some cases, the search returns no results and will throw up an error window
            element = driver.find_element(By.ID, '#ICOK') 
            element.click() # Click OK to get out of the error
            time.sleep(1)
            return None
        except: 
            pass
        
        i = 0
        while True:
            try: # Find a checkbox based on index
                element = driver.find_element(By.ID, f"MSU_CLSRCH_I_DV$0_row_{i}")
            except: # If there's no more checkbox, break out of loop
                break 
            name = element.find_element(By.ID, f"MSU_CLSRCH_WRK2_INSTR_NAME${i}").text
            # If the checkbox is the instructor, don't click
            if name == instructor.replace(' ', ''): 
                i += 1
                continue
            # Else, click to unselect the checkbox
            checkbox = element.find_element(By.ID, f"MSU_CLSRCH_I_DV$selm${i}$$0")
            driver.execute_script("arguments[0].click();", checkbox)
            i += 1
        if i > 1:
            result_i = driver.find_element(By.ID, 'MSU_RSLT_NAV_WK_PTPG_ROWS_GRID').text
            element = driver.find_element(By.ID, 'MSU_CLSRCH_WRK2_APPLY_PB$186$') # Click Filter Results
            driver.execute_script("arguments[0].click();", element)
            wait.until(lambda drv: drv.find_element(By.ID, 'MSU_RSLT_NAV_WK_PTPG_ROWS_GRID').text != result_i)
            wait.until(EC.element_to_be_clickable((By.ID, "DESCR100$0_row_0"))) # More time to adjust

        classes = set()
        for i in range(50):
            try: # Find the class that the instructor taught
                element = driver.find_element(By.ID, f"DESCR100$0_row_{i}") 
            except: 
                break
            class_name = element.find_element(By.XPATH, f".//div[@id='win9divMSU_RSLT_NAV_WK_HTMLAREA${i}']").text
            class_name.replace('Approval Required', '').strip()
            class_name.replace('Cross-Listed', '').strip()
            classes.add(class_name) # Add the class name to the set
    
        time.sleep(1)
        element = driver.find_element(By.ID, 'MSU_CLSRCH_WRK2_CLEAR_FLDS_PB') # Click New Search
        driver.execute_script("arguments[0].click();", element)
        wait.until(EC.visibility_of_element_located((By.ID, "win9divMSU_CLSRCH_WRK2_HTMLAREA3")))
        wait.until(EC.element_to_be_clickable((By.ID, 'MSU_CLSRCH_WRK2_GROUP3')))
        return classes
    
    except: # Fail-save, in case something goes wrong, the code will continue instead of running from the beginning
        print(f'Code fails here, run again from {instructor} in {semester}')
        return search_by_instructor(instructor, semester)

In [None]:
def search_multiple_semesters(df, start_semester=None, end_semester=None):
    if not start_semester:
        start_semester = semesters_list[0]
    if not end_semester:
        end_semester = semesters_list[-1]
    start_index = semesters_list.index(start_semester)
    end_index = semesters_list.index(end_semester)
    sublist = semesters_list[start_index:end_index+1]
    for semester in sublist:
        df[f"{semester}"] = df["CMSE Faculty Name"].apply(
            lambda name: search_by_instructor(name, semester))
    return df

search_multiple_semesters(df, start_semester='Fall Semester 2021', end_semester='Fall Semester 2024')

In [None]:
df.to_csv('CMSE_Names.csv', index=False)