<a href="https://colab.research.google.com/github/david-c-brown/clinic_kpi_monthly/blob/main/load.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from tqdm import tqdm
import pandas as pd

In [None]:
# custom functions
def modify_name(name):
    # Remove the hyphen and anything after it
    name = name.split(" -")[0].split("-")[0]

    # Check if the name is in the format "Name1, Name2"
    name_parts = name.split()
    if len(name_parts) > 1:
        if "‚" in name:  # check if the separator is "‚"
            name_parts.reverse()  # reverse the order of name parts
        else:
            None

        # Remove the comma from the name
        name_parts = [part.replace('‚', '') for part in name_parts]

    if name.islower():
        return None
    else:
        return " ".join(name_parts)

def names_to_initials(names):
    initials = {}
    for name in names:
        name_parts = name.split()
        if len(name_parts) > 1:
            first_initial = name_parts[0][0]
            last_initial = name_parts[-1][0]
            initials[name] = first_initial + last_initial
        else:
            initials[name] = name_parts[0][0]

            print(f'{name} is being skipped')
    return initials

def update_web_table(df):
    # get list of individuals (columns) and metrics (rows)
    individuals = list(df.columns)
    metrics = list(df.index)

    # Iterate over the rows (metrics)
    for i, metric in enumerate(metrics):
        # Iterate over the columns (individuals)
        for j, individual in enumerate(individuals):
            # Check if the cell is not NaN, $ or empty
            if pd.notna(df.iloc[i, j]) and df.iloc[i, j] != "$" and df.iloc[i, j] != "%":
                try:
                    # Construct xpath to find the input field in the cell
                    # Assuming the metrics are in the same order as in the df
                    # Note: XPath indexing starts from 1, hence i+1
                    # Adjusted the column index to j+1 as it was shifted one column to the right
                    xpath = f'//tr[@class="c-table__row"][{i+1}]/td[@class="c-table__column c-table__column--f2"][{j+1}]//input[@class="revise-metric__input"]'
                    web_cell = driver.find_element(By.XPATH, xpath)

                    # Enter the value from the DataFrame into the web cell
                    web_cell.clear()
                    web_cell.send_keys(str(df.iloc[i, j]))

                except:
                    # If the element is not found, print the error and continue
                    print(f"Input field for {metric} and {individual} not found")

df = pd.read_csv('clinic_cpt_breakout_may_2023.csv').dropna() # the month will need to change on subsequent months. could update script to call this month, but not really worried about it
name_df = pd.read_csv('../clinic_master/last_month.csv') # pulled all data from the quarter to capture all possible persons treating
per_pt_clinic_master = pd.read_csv('per_pt_clinic_master.csv')

# every level of this process has a slightly different name or amount of info for each person. Below we normalize to the CRM UI
name_mapping = {
    'PT1': 'Physical Therapist 1',
    'PT with Maiden Name': 'PT Married Name',
    'Hyphenated last': 'Last matched to UI',
    'etc': 'etc.'
}

name_mapping_clinic = {
    'Clinic': 'Cl.in.ic',
}

name_mapping_cm = {
    'Nickname': 'Full name',
}

name_df.loc[:, 'Treating Therapist'] = [name for name in (modify_name(name) for name in name_df['Treating Therapist']) if name is not None]
name_df['Clinic Name'] = name_df['Clinic Name'].replace(name_mapping_clinic, regex=True)

per_pt_clinic_master.rename(columns={'Total Visits': 'Visits', 'Visits/Eval': 'Visits per eval', 'Rev/Visit': 'Rev per visit'}, inplace=True)
per_pt_clinic_master['First Name'] = per_pt_clinic_master['First Name'].replace(name_mapping_cm)

# building a dictionary to match to the cpt output
clinic_dict = {}
for _, row in name_df.iterrows():
    clinic_name = row['Clinic Name']
    therapist_name = row['Treating Therapist']
    if clinic_name not in clinic_dict:
        clinic_dict[clinic_name] = {}
    clinic_dict[clinic_name][names_to_initials([therapist_name])[therapist_name]] = therapist_name

# error catching, making sure we aren't missing anyone
unmatched_dict = {}
for clinic_name, initials_dict in clinic_dict.items():
    unmatched_initials = set(df.loc[df['Clinic'] == clinic_name, 'PT']) - set(initials_dict.keys())
    if unmatched_initials != 'Practice':
        unmatched_dict[clinic_name] = list(unmatched_initials)
        df.loc[df['Clinic'] == clinic_name, 'PT'] = df.loc[df['Clinic'] == clinic_name, 'PT'].apply(lambda x: initials_dict.get(x, x))

df['PT'] = df['PT'].replace(name_mapping)
df['Name'] = df['PT']
df['First Name'] = df['PT'].str.split().str[0]
df = df.replace('%', '', regex=True)

# dataframe manipulation to insert into web table
merged_df = pd.merge(df, per_pt_clinic_master, on = ['First Name', 'Clinic'], how='left', indicator = True)
merged_df = merged_df[['Clinic', 'Name', 'Code mix', 'Codes per eval',
                        'Codes per non eval','Re eval per eval', 'Visits',
                        'Utilization', 'Visits per eval', 'Rev per visit', 'Revenue']]

merged_df['Utilization'] = [int(value * 100) if not pd.isna(value) else value for value in merged_df['Utilization']]
merged_df['Visits per eval'] = merged_df['Visits per eval'].round(2)

# make sure the output looks right
merged_df.head(5)

In [None]:
# login to site
url = 'https://url.company.com'
response = requests.get(url)
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--start-maximized')

driver = webdriver.Chrome(options=chrome_options)
driver.get(url)

username = driver.find_element(By.ID, 'user_email')
password = driver.find_element(By.ID, 'user_password')

# enter the login credentials
username.send_keys("login")
password.send_keys("password")

login_button = driver.find_element(By.NAME, 'commit')
login_button.click()

dropdown = driver.find_element(By.ID, 'user-usurp')
dropdown.click()

# generates a list of clickable elements in the dropdown
options = dropdown.find_elements(By.PARTIAL_LINK_TEXT, 'Company Name')

# waits for the elements to load, site is old and has strong opinions
wait = WebDriverWait(driver, 10)

# converts those elements to text to: be more iterable in the loop, and more usable as an entry.
link_texts = []
for option in options:
    text = option.text
    link_texts.append(text)

# removes headquarters & inactive clinics from list
link_texts = link_texts[1:]
link_texts = link_texts[:-4]

In [None]:

for i in link_texts: #[::-1]: # reverse order, helpful for troubleshooting as these are smaller clinics
    # navigate to the correct clinic
    location = driver.find_element(By.LINK_TEXT, i)
    location.click()

    # locate the dropdown for modifying kpi
    revise_menu = wait.until(EC.presence_of_element_located((By.ID, 'console')))
    revise_menu.click()

    # locate the hyperlink within dropdown
    revise_metrics = wait.until(EC.presence_of_element_located((By.LINK_TEXT, 'Revise metrics')))
    revise_metrics.click()
    # we use this to map to our merged dataframe
    clinic_name = driver.find_element(By.CLASS_NAME, 'dropdown-toggle').text


    if clinic_name == 'The one Clinic with a different naming convention in the UI':
        idf = merged_df[merged_df['Clinic'] == clinic_name[10:]]
    else:
        idf = merged_df[merged_df['Clinic'] == clinic_name[11:
                                                           ]]
    # we transpose a filtered dataframe to match the web table
    idf = idf[['Name','Code mix', 'Codes per eval', 'Codes per non eval', 'Re eval per eval', 'Rev per visit', 'Revenue', 'Utilization', 'Visits', 'Visits per eval']].set_index('Name').transpose()

    # locate the table on the page
    table_element = driver.find_element(By.XPATH,'//table')

    # convert the table to a dataframe and extract it to our program
    table_html = table_element.get_attribute('outerHTML')
    table_data = pd.read_html(table_html)[0]

    # convert the table into a more usable style and insert our filtered df
    df2= pd.DataFrame(table_data).set_index('Name')
    df2.update(idf)

    # function defined above
    update_web_table(df2)

    # locate the save button and click it
    button = wait.until( EC.element_to_be_clickable((By.CSS_SELECTOR,'[value="Save and calculate metrics"]')))
    button.click()

    # on to the next one
    dropdown = driver.find_element(By.ID, 'user-usurp')
    dropdown.click()
