In [None]:
SEARCH_URL = "https://floorplans.mit.edu/SearchPDF.Asp"
LIST_URL = "https://floorplans.mit.edu/ListPDF.Asp?Bldg="

In [None]:
import os  
import sys
from selenium import webdriver  
from selenium.webdriver.common.keys import Keys  
from selenium.webdriver.chrome.options import Options 
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [None]:
DUO_PATH = "/Users/georgiashay/Documents/Software/duo-cli"
sys.path.append(DUO_PATH)
from duo_gen import generate_next_token

In [None]:
SCRIPT_LOCATION = os.path.abspath('')
DATA_FOLDER = os.path.join(SCRIPT_LOCATION, 'data3/')

In [None]:
chrome_options = Options()  
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--ignore-urlfetcher-cert-requests")
chrome_options.add_experimental_option("prefs", {
    "plugins.always_open_pdf_externally": True,
    "download.default_directory" : DATA_FOLDER,
    'profile.managed_auto_select_certificate_for_urls': ['{"pattern":"https://idp.mit.edu:446","filter":{"ISSUER":{"OU":"Client CA v1"}}}']
    })  
# chrome_options.set_headless()

In [None]:
def login_to_floorplan_page(driver):
    wait = WebDriverWait(driver, 10)
    driver.get(SEARCH_URL)
    
    
    print("Waiting for continue button")
    # Click login continue button when it appears
    wait.until(EC.element_to_be_clickable((By.ID, "Select"))).click()
    
    print("Wait for certificate button")
    # Click login by certificate button when it appears
    wait.until(EC.element_to_be_clickable((By.NAME, "login_certificate"))).click()

    # Jump into iframe for entering passcode
    wait.until(EC.frame_to_be_available_and_switch_to_it((By.ID, 'duo_iframe')))
    
    print("Entering passcode")
    # Choose passcode duo authentication method
    passcode_button = driver.find_element_by_id("passcode")
    passcode_button.click()
    
    # Get passcode input
    passcode_input = driver.find_element_by_name("passcode")
    
    # Generate next passcode and input it
    next_password = generate_next_token()
    passcode_input.send_keys(next_password)
    
    # Submit the passcode to duo
    passcode_button.click()
    
    print("Waiting for building")
    # Wait until logged in
    wait.until(EC.visibility_of_element_located((By.NAME, "Bldg")))
    
    return driver

In [None]:
driver = webdriver.Chrome(options=chrome_options)  

In [None]:
login_to_floorplan_page(driver)

In [None]:
def get_building_list(driver):
    building_select = driver.find_element_by_name("Bldg")
    building_options = building_select.find_elements_by_tag_name("option")
    building_names = [building_option.get_attribute("value") for building_option in building_options]
    return building_names

In [None]:
building_names = get_building_list(driver)

In [None]:
building_names

In [None]:
def download_all_floorplans(driver):
    wait = WebDriverWait(driver, 10)
    building_names = get_building_list(driver)
    
    for building_name in building_names:
        driver.get(LIST_URL + building_name)
        
        wait.until(EC.visibility_of_element_located((By.ID, 'maincontent')))
        
        floor_links = driver.find_elements_by_xpath('//a[contains(@href,"/pdfs/")]')
        for floor_link in floor_links:
            floor_link.click()
        

In [None]:
def reorganize_floorplans():
    buildings = {}
    for filename in os.listdir(DATA_FOLDER):
        if filename.endswith(".pdf"):
            base_filename = os.path.splitext(filename)[0]
            building, floor = base_filename.split('_')
            if building in buildings:
                buildings[building].add(filename)
            else:
                buildings[building] = {filename}
    for building in buildings:
        building_folder = os.path.join(DATA_FOLDER, building+'/')
        os.mkdir(building_folder)
        for file in buildings[building]:
            current_path = os.path.join(DATA_FOLDER, file)
            new_path = os.path.join(building_folder, file)
            os.rename(current_path, new_path)

In [None]:
reorganize_floorplans()

In [83]:
import os  
import sys
from selenium import webdriver  
from selenium.webdriver.common.keys import Keys  
from selenium.webdriver.chrome.options import Options 
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from login_with_duo import login_to_page

In [2]:
from login_with_duo import login_to_page

In [3]:
SCRIPT_LOCATION = os.path.abspath('')
DATA_FOLDER = os.path.join(SCRIPT_LOCATION, 'rooms/')

In [39]:
chrome_options = Options()  
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--ignore-urlfetcher-cert-requests")
chrome_options.add_experimental_option("prefs", {
    "plugins.always_open_pdf_externally": True,
    "download.default_directory" : DATA_FOLDER,
    'profile.managed_auto_select_certificate_for_urls': ['{"pattern":"https://idp.mit.edu:446","filter":{"ISSUER":{"OU":"Client CA v1"}}}']
    })  

In [40]:
LIST_URL="https://floorplans.mit.edu/cgi-bin-db-mit/wdbmitscript.asp?Report=ibrl&Item=MIT"
HOME_URL="https://floorplans.mit.edu/mit-room.html"
ROOM_URL="https://floorplans.mit.edu/cgi-bin-db-mit/wdbmitscript.asp?report=brl&item="

In [41]:
room_summary_condition = EC.visibility_of_element_located((By.TAG_NAME, "menu"))
home_page_condition = EC.visibility_of_element_located((By.CLASS_NAME, "paddingLogo"))

In [74]:
def get_building_list(driver):
    menu = driver.find_element_by_tag_name("menu")
    building_selects = menu.find_elements_by_tag_name("a")
    building_names = [building_select.get_attribute("text").strip() for building_select in building_selects]
    return building_names

In [75]:
def get_room_use_texts(driver):
    original_url = driver.current_url
    
    wait = WebDriverWait(driver, 3)
    building_names = get_building_list(driver)
    
    for building_name in building_names:
        driver.get(ROOM_URL + building_name)
        
        try:
            wait.until(EC.visibility_of_element_located((By.TAG_NAME, 'pre')))
        except TimeoutException:
            print("Could not get information for " + building_name)
            continue
        
        text_tag = driver.find_element_by_tag_name("pre")
        text = text_tag.text
        
        with open(os.path.join(DATA_FOLDER, building_name + '.txt'), 'w+') as f:
            f.write(text)
    
    driver.get(original_url)
    

In [86]:
driver = webdriver.Chrome(options=chrome_options)  

In [87]:
login_to_page(driver, HOME_URL, home_page_condition)

In [88]:
driver.get(LIST_URL)
wait = WebDriverWait(driver, 10)
wait.until(room_summary_condition)

<selenium.webdriver.remote.webelement.WebElement (session="747a7c207308709f1b5b86430ad22528", element="079f31eb-7053-4c86-87f3-ac041754e70c")>

In [89]:
get_room_use_texts(driver)

Could not get information for 42C
