In [2]:

import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.keys import Keys
import pandas as pd
import re
import time
from IPython.display import clear_output
from config import user,pw,clinics


In [None]:
# Constants

main = pd.DataFrame(columns=['Clinic', 'PT', 'Week Ending', 'Initial Examination', 'Follow-Up', 'Discharge', 'Baseline Screen', 'Check-Up'])

eval_counts = []

url = 'https://app.webpt.com/dashboard.php'

response = requests.get(url)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--start-maximized')
driver = webdriver.Chrome(options=chrome_options)

In [22]:
# Check name similarity
def extract_name(title):
    # Pattern 1: Last Name, First Name
    pattern1 = r'([A-Za-z]+),\s+([A-Za-z]+)'
    # Pattern 2: First Name Last Name
    pattern2 = r'([A-Za-z]+)\s+([A-Za-z]+)'

    match1 = re.search(pattern1, title)
    match2 = re.search(pattern2, title)

    if match1:
        return match1.group(1).lower(), match1.group(2).lower()
    elif match2:
        return match2.group(2).lower(), match2.group(1).lower()
    else:
        return None
# Count # of evals, removing duplicate entries
def count_evals():
    evals = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-1 "]') + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-6 "]')
    eval_counts.clear()
    for eval in evals:
        title = eval.find_element(By.CLASS_NAME, 'title').text
        name = extract_name(title)
        if name is None:
            continue

        similar_title_found = False
        for existing_title in eval_counts:
            existing_name = extract_name(existing_title)
            if existing_name is None:
                continue

            if name == existing_name:
                similar_title_found = True
                break

        if not similar_title_found:
            eval_counts.append(title)

    eval_count = len(eval_counts)
    
    return eval_count
# Count # of html instances corresponded to visit type
def visit_count(g,pt):
    global main  # add this line to use the global variable 'main'
    follow_up = (driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-17"]') 
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-11"]')  
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-10"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-32"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-19"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-18"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-5"]')        )

    grad = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-2 "]')
    
    checkup = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-9 "]')

    baseline = []
    friday = driver.find_element(By.XPATH, "//div[contains(text(), 'Fri ')]").text.replace('Fri ','')

    df = pd.DataFrame([[g, pt, friday, count_evals(), len(follow_up) ,len(grad), len(baseline),len(checkup)]] , columns = main.columns)
    
    main = pd.concat([main,df])
    return main
# Get value from dict
def get_checkbox_id(pt, checkbox_dict):
    return checkbox_dict.get(pt.strip())

In [5]:
driver.get(url)

# Navigate the new page layout
username = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'username')))
username.send_keys(user)

cont = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "c972fd828")))
cont.click()

password = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, 'password')))
password.send_keys(pw)

cont = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "c972fd828")))
cont.click()

# There is an occasional pop up.
try:
    button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//button[@class='eviction-option ok'][contains(@onclick, '/eviction/evict')]")))
    button.click()
except:
    pass


for g in clinics:
    search_input = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CLASS_NAME, 'chosen-search-input')))
    search_input.send_keys(g, Keys.ENTER)
    
    # During normal navigation, the site always resets back to main clinic page
    # When navigating with selenium, it sometimes stays on the schedule page.
    try:
        view_schedule = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.LINK_TEXT, 'View Schedule')))
        view_schedule.click()
    except:
        pass
    
    # Build a list of PTs from the schedule
    scheduler_calendar_list = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.ID, "SchedulerCalendarList")))
    white_text_spans = scheduler_calendar_list.find_elements(By.CSS_SELECTOR, "span.white-text")

    try:
        pt_list = [span.text for span in white_text_spans]
    except:
        pass

    pt_list.remove('All Calendars ')

    checkbox_dict = {}
    for pt in pt_list:
        while True:
            try:
                checkboxes = driver.find_elements(By.CLASS_NAME, 'dnd')
                for checkbox in checkboxes:
                    if pt in checkbox.text:
                        checkbox_id = checkbox.find_element(By.TAG_NAME, 'input').get_attribute('id')
                        checkbox_dict[pt.strip()] = checkbox_id
                        break  # move on to the next name
                break  # exit the while loop if all checkboxes have been processed
            except StaleElementReferenceException:
                continue
        # checkboxes = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'dnd')))
        # for checkbox in checkboxes:
        #     if pt in checkbox.text:
        #         checkbox_id = checkbox.find_element(By.TAG_NAME, 'input').get_attribute('id')
        #         checkbox_dict[pt.strip()] = checkbox_id
        #         break  # move on to the next name

    
    for pt in pt_list:
        while True:
            try:
                checkboxes = driver.find_elements(By.CLASS_NAME, 'dnd')
                for checkbox in checkboxes:
                    if checkbox.is_selected():
                        checkbox.click()
                
                # Select the checkbox for the current PT name
                checkbox = driver.find_element(By.ID, checkbox_dict[pt.strip()])
                checkbox.click()
                break  # exit the while loop if the checkbox has been selected
            except StaleElementReferenceException:
                continue  # try again if the checkbox has become stale or c

        checkbox = driver.find_element(By.ID, get_checkbox_id(pt, checkbox_dict))
        if checkbox.is_selected():
            pass
        else:
            checkbox.click()

        time.sleep(3)
        last_week = driver.find_element(By.ID, "app-calendar-tb-prev")
        last_week.click()
        time.sleep(3)
        visit_count(g,pt)

        this_week = driver.find_element(By.ID, "app-calendar-tb-next")
        this_week.click()
        time.sleep(3)
        visit_count(g,pt)

        # next_week = driver.find_element(By.ID, "app-calendar-tb-next")
        # next_week.click()
        # time.sleep(3)
        # visit_count(g,pt)
        
        # time.sleep(1)
        # # driver.find_element(By.ID, "app-calendar-tb-prev").click()
        # time.sleep(1)

        checkbox = driver.find_element(By.ID, get_checkbox_id(pt, checkbox_dict))
        if checkbox.is_selected():
            checkbox.click()
        clear_output(wait=True)
        print(main)

main.to_csv('finally3.csv')


NameError: name 'evals' is not defined

In [54]:
main.to_csv('finally.csv')

In [55]:
main.to_csv('finally3.csv')


In [77]:
eval_counts = []

def extract_name(title):
    # Pattern 1: Last Name, First Name
    pattern1 = r'([A-Za-z]+),\s+([A-Za-z]+)'
    # Pattern 2: First Name Last Name
    pattern2 = r'([A-Za-z]+)\s+([A-Za-z]+)'

    match1 = re.search(pattern1, title)
    match2 = re.search(pattern2, title)

    if match1:
        return match1.group(1).lower(), match1.group(2).lower()
    elif match2:
        return match2.group(2).lower(), match2.group(1).lower()
    else:
        return None

def count_evals():
    
    evals = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-1 "]') + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-6 "]')
    eval_counts.clear()
    for eval in evals:
        title = eval.find_element(By.CLASS_NAME, 'title').text
        name = extract_name(title)
        if name is None:
            continue

        similar_title_found = False
        for existing_title in eval_counts:
            existing_name = extract_name(existing_title)
            if existing_name is None:
                continue

            if name == existing_name:
                similar_title_found = True
                break

        if not similar_title_found:
            eval_counts.append(title)

    eval_count = len(eval_counts)
    
    return eval_count

def visit_count():
    global main  # add this line to use the global variable 'main'
    follow_up = (driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-17"]') 
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-11"]')  
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-10"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-32"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-19"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-18"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-5"]')        
    )

    eval = (driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-1 "]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-6 "]')
    )

    grad = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-2 "]')
    
    checkup = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-9 "]')

    baseline = []
    friday = driver.find_element(By.XPATH, "//div[contains(text(), 'Fri ')]").text.replace('Fri ','')

    # df = pd.DataFrame([[g, pt, friday, len(eval), len(follow_up) ,len(grad), len(baseline),len(checkup)]] , columns = main.columns)
    
    # main = pd.concat([main,df])
    return print(friday, len(eval), len(follow_up) ,len(grad), len(baseline),len(checkup))

visit_count()

3/24 6 17 0 0 0


In [21]:
import jellyfish

eval_counts = []

def count_evals():
    
    evals = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-1 "]') + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-6 "]')
    eval_counts.clear()
    for eval in evals:
        title = eval.find_element(By.CLASS_NAME, 'title').text
        similar_title_found = False
        for existing_title in eval_counts:
            if jellyfish.jaro_winkler(title, existing_title) >= 0.62:  # Check if titles are similar with a similarity threshold of 0.85
                similar_title_found = True
                break
        if not similar_title_found:
            eval_counts.append(title)

    eval_count = len(eval_counts)
    
    return eval_count


count_evals()

4

In [26]:
eval_counts = []

def visit_count(g,pt):
    global main  # add this line to use the global variable 'main'
    follow_up = (driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-17"]') 
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-11"]')  
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-10"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-32"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-19"]')
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-18"]')        
            + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-5"]')        )


    evals = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-1 "]') + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-6 "]')
    eval_counts.clear()
    for eval in evals:
        title = eval.find_element(By.CLASS_NAME, 'title').text
        similar_title_found = False
        for existing_title in eval_counts:
            if jellyfish.jaro_winkler(title, existing_title) >= 0.62:  # Check if titles are similar with a similarity threshold of 0.85
                similar_title_found = True
                break
        if not similar_title_found:
            eval_counts.append(title)

    eval_count = len(eval_counts)


    grad = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-2 "]')
    
    checkup = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-9 "]')

    baseline = []
    friday = driver.find_element(By.XPATH, "//div[contains(text(), 'Fri ')]").text.replace('Fri ','')

    df = pd.DataFrame([[g, pt, friday, eval_count, len(follow_up) ,len(grad), len(baseline),len(checkup)]] , columns = main.columns)
    
    main = pd.concat([main,df])
    return main

visit_count(g,pt)

Unnamed: 0,Clinic,PT,Week Ending,Initial Examination,Follow-Up,Discharge,Baseline Screen,Check-Up
0,Saratoga,Robert Lucente,3/17,8,33,1,0,2
0,Saratoga,Robert Lucente,3/17,8,33,1,0,2
0,Saratoga,Robert Lucente,3/17,8,33,1,0,2
0,Saratoga,Robert Lucente,3/17,8,33,1,0,2
0,Saratoga,Robert Lucente,3/17,8,33,1,0,2
0,Saratoga,Robert Lucente,3/17,4,33,1,0,2
0,Saratoga,Robert Lucente,3/24,3,39,4,0,0
0,Saratoga,Robert Lucente,3/17,5,25,1,0,1


In [30]:
import re

eval_counts = []

def extract_name(title):
    # Pattern 1: Last Name, First Name
    pattern1 = r'([A-Za-z]+),\s+([A-Za-z]+)'
    # Pattern 2: First Name Last Name
    pattern2 = r'([A-Za-z]+)\s+([A-Za-z]+)'

    match1 = re.search(pattern1, title)
    match2 = re.search(pattern2, title)

    if match1:
        return match1.group(1).lower(), match1.group(2).lower()
    elif match2:
        return match2.group(2).lower(), match2.group(1).lower()
    else:
        return None

def count_evals():
    
    evals = driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-1 "]') + driver.find_elements(By.CSS_SELECTOR, '[class^="x-cal-6 "]')
    eval_counts.clear()
    for eval in evals:
        title = eval.find_element(By.CLASS_NAME, 'title').text
        name = extract_name(title)
        if name is None:
            continue

        similar_title_found = False
        for existing_title in eval_counts:
            existing_name = extract_name(existing_title)
            if existing_name is None:
                continue

            if name == existing_name:
                similar_title_found = True
                break

        if not similar_title_found:
            eval_counts.append(title)

    eval_count = len(eval_counts)
    
    return eval_count

count_evals()

4