In [14]:

import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
from selenium.webdriver.common.keys import Keys
from datetime import date, datetime, timedelta
import pandas as pd
import re
import time
from IPython.display import clear_output
from config import user,pw,clinics


In [15]:
# Constants

main = pd.DataFrame(columns=['Clinic', 'PT','PTO', 'Week Ending', 'Initial Examination', 'Follow-Up', 'Discharge', 'Baseline Screen', 'Check-Up'])
next_main = pd.DataFrame()
eval_counts = []

today = date.today()
date_str = today.strftime("%Y-%m-%d")
filename = f"last_week_this_week_{date_str}.csv"

In [16]:
# Check name similarity
def extract_name(title):
    title = title.replace('IE ', '').lstrip()
    
    # Pattern 1: Last Name, First Name
    pattern1 = r'([A-Za-z]+),\s+([A-Za-z]+)'
    # Pattern 2: First Name Last Name
    pattern2 = r'([A-Za-z]+)\s+([A-Za-z]+)'
    # Pattern 3: First Initial. Last Name
    pattern3 = r'([A-Za-z])\.?\s+([A-Za-z]+)'

    match1 = re.search(pattern1, title)
    match2 = re.search(pattern2, title)
    match3 = re.search(pattern3, title)
    if match1:
        return match1.group(1).lower(), match1.group(2).lower()
    elif match2:
        return match2.group(2).lower(), match2.group(1).lower()
    elif match3:
        return match3.group(2).lower(), match3.group(1).lower()
    else:
        return None

# Count # of evals, removing duplicate entries
def count_evals():
    evals = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-1 "]') + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-6 "]')
    eval_counts.clear()
    for eval in evals:
        title = eval.find_element(By.CLASS_NAME, 'title').text
        name = extract_name(title)
        if name is None:
            continue

        similar_title_found = False
        for existing_title in eval_counts:
            existing_name = extract_name(existing_title)
            if existing_name is None:
                continue

            if name == existing_name:
                similar_title_found = True
                break

        if not similar_title_found:
            eval_counts.append(title)

    eval_count = len(eval_counts)
    
    return eval_count

# Count # of html instances corresponded to visit type
def visit_count():
    global main  # add this line to use the global variable 'main'

    # All classes will differ based on clinics/company. Here are ours.
    # The plethora of events counted as Follow Up solves for OMs across the board being able to dictate their preferred colors/specifications.
    follow_up = (driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-17"]') 
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-11"]')  
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-10"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-32"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-19"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-18"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-5"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-24"]')        )

    grad = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-2 "]')
    
    checkup = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-9 "]')

    baseline = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-21 "]')

    friday = driver.find_element(By.XPATH, "//div[contains(text(), 'Fri ')]").text.replace('Fri ','')

    # While a more advanced approach is likely possible using pixel height of various components, 
    # that method requires some more troubleshooting to get accurately.
    # So the workaround for now, is noting if there is any PTO, and doing the total utilization manually after the script finishes.
    utilization = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-29"]')
    print(f"Initial Evals count: {count_evals()}")
    print(f"Follow-up count: {len(follow_up)}")
    print(f"PTO Blocks count: {len(utilization)}")
    print(f"PT: {pt}")
    print(f"Friday: {friday}")
    
    # build the df and insert it into our ending dataframe
    df = pd.DataFrame([[g, pt, len(utilization), friday, count_evals(), len(follow_up) ,len(grad), len(baseline),len(checkup)]] , columns = main.columns)
    main = pd.concat([main,df])
    # drop empty schedules.
    main = main[(main['Follow-Up'] != 0) ]

    return main

# Next week requires a different structure and breakout
def next_week_count():
    global next_main  # add this line to use the global variable 'main'
    
    follow_up = (driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-17"]') 
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-11"]')  
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-10"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-32"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-19"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-18"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-5"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-2 "]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-9 "]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-21 "]')
    )
    # this serves as our Week End date, which is how we delineate time periods
    friday = driver.find_element(By.XPATH, "//div[contains(text(), 'Fri ')]").text.replace('Fri ','')
    # if this is not 0, then we need to change the PTs capacity/availability
    utilization = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-29"]')
    print(f"Initial Evals count: {count_evals()}")
    print(f"Follow-up count: {len(follow_up)}")
    print(f"PTO blocks count: {len(utilization)}")
    print(f"PT: {pt}")
    print(f"Friday: {friday}")

    next_df = pd.DataFrame([[g, pt, len(utilization), friday, count_evals(), len(follow_up)]] , columns = main.columns[:-3])
    next_main = pd.concat([next_main, next_df])
    next_main = next_main[(next_main['Follow-Up'] != 0)]

    return next_main

# Get value from dict
def get_checkbox_id(pt, checkbox_dict):
    return checkbox_dict.get(pt.strip())

def select_checkbox_by_id(checkbox_id, select=True, timeout=10):
    start_time = time.time()
    while time.time() - start_time < timeout:
        try:
            checkbox = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.ID, checkbox_id)))
            selected = checkbox.is_selected()

            if (select and not selected) or (not select and selected):
                checkbox.click()
                # Verify the state of the checkbox after clicking
                WebDriverWait(driver, timeout).until(
                    EC.element_to_be_clickable((By.ID, checkbox_id))
                )
                new_selected = checkbox.is_selected()
                if selected != new_selected:
                    break
        except StaleElementReferenceException:
            continue
        except TimeoutException:
            print(f"Checkbox with ID {checkbox_id} not found within the given timeout.")
            break

In [17]:

url = 'https://app.webpt.com/dashboard.php'
response = requests.get(url)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(options=chrome_options)
driver.get(url)

wait = WebDriverWait(driver, 10)
# Navigate the new page layout
username = wait.until(EC.visibility_of_element_located((By.ID, 'username')))
username.send_keys(user)

cont = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "c972fd828")))
cont.click()

password = wait.until(EC.visibility_of_element_located((By.ID, 'password')))
password.send_keys(pw)

cont = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "c972fd828")))
cont.click()

# There is an occasional pop up.
try:
    button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@class='eviction-option ok'][contains(@onclick, '/eviction/evict')]")))
    button.click()
except:
    pass


for g in clinics:
    
    search_input = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'chosen-search-input')))
    
    # There are 2 clinics that appear second in the list when searched by name.
    index = clinics.index(g)
    if index == 4 or index == 14:
        search_input.send_keys(g, Keys.DOWN,Keys.ENTER)
    else:
        search_input.send_keys(g, Keys.ENTER)
    
    # During normal navigation, the site always resets back to main clinic page
    # When navigating with selenium, it sometimes stays on the schedule page.
    try:
        view_schedule = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, 'View Schedule')))
        view_schedule.click()
    except:
        pass
    
    # Build a list of PTs from the schedule
    scheduler_calendar_list = wait.until(EC.visibility_of_element_located((By.ID, "SchedulerCalendarList")))
    white_text_spans = scheduler_calendar_list.find_elements(By.CSS_SELECTOR, "span.white-text")

    try:
        pt_list = [span.text for span in white_text_spans]
    except:
        pass

    pt_list.remove('All Calendars ')

    # Create a dictionary that takes marries the PT name to its checkbox in the sidebar.
    checkbox_dict = {}
    for name in pt_list:
        while True:
            try:
                checkboxes = driver.find_elements(By.CLASS_NAME, 'dnd')
                for checkbox in checkboxes:
                    if name in checkbox.text:
                        checkbox_id = checkbox.find_element(By.TAG_NAME, 'input').get_attribute('id')
                        checkbox_dict[name.strip()] = checkbox_id
                        break  # move on to the next name
                break  # exit the while loop if all checkboxes have been processed
            except StaleElementReferenceException:
                continue


    for pt in pt_list:
        print(f"{pt}'s loop is starting")
        checkboxes = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'scheduleCheckbox')))
        
        for checkbox in checkboxes:
            try:
                if checkbox.is_selected():
                    checkbox.click()
            except StaleElementReferenceException:
                continue

        # Select the checkbox for the current PT name
        select_checkbox_by_id(checkbox_dict[pt.strip()], select=True)

        time.sleep(3)
        last_week = driver.find_element(By.ID, "app-calendar-tb-prev")
        last_week.click()
        time.sleep(3)
        visit_count()

        this_week = driver.find_element(By.ID, "app-calendar-tb-next")
        this_week.click()
        time.sleep(3)
        visit_count()

        next_week = driver.find_element(By.ID, "app-calendar-tb-next")
        next_week.click()
        time.sleep(3)
        next_week_count()
        
        time.sleep(3)
        driver.find_element(By.ID, "app-calendar-tb-prev").click()
        time.sleep(3)
        
        select_checkbox_by_id(checkbox_dict[pt.strip()], select=False)

        clear_output(wait=True)
        print(main.iloc[:, 1:6].tail(2))
        print(next_main.iloc[:, 1:].tail(1))


main.to_csv(filename, index=False)
next_main.to_csv(f'future_visits_{filename}', index=False)
print(f"File saved as {filename}")

                PT PTO Week Ending Initial Examination Follow-Up
0  Neena Kirkwood    0        3/24                   5        16
0  Neena Kirkwood    0        3/31                   2        26
                PT  PTO Week Ending  Initial Examination  Follow-Up
0  Neena Kirkwood     0         4/7                    1         14
File saved as last_week_this_week_2023-03-27.csv


In [18]:
'''
Runtime 3/20:
24m 24.6s
* Checkbox was simpler, and more often wrong.
* Checked this and last week

Runtime 3/27:
54m 44.6s

TODO:
Last clinic doubles some evals. They use IE and IE -. the name check doesn't catch it - Done
Figure out PTO/Utilization - Fixed with ducttape
Troubleshoot edge cases on checkboxes - Mostly fixed
'''

"\nRuntime 3/20:\n24m 24.6s\n* Checkbox was simpler, and more often wrong.\n* Checked this and last week\n\n\n\nTODO:\nLast clinic doubles some evals. They use IE and IE -. the name check doesn't catch it - Done\nFigure out PTO/Utilization - Fixed with ducttape\nTroubleshoot edge cases on checkboxes - Mostly fixed\n"