In [43]:
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.graph_objects as go
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

<pre>
+---------------------------------------------------------------------------------------------------------+
|+-------------------------------------------------------------------------------------------------------+|
||[1. logo] 2. Louisville Metro Government Salary tracker   [3. Department drop down][ 4. year drop down]|| 
|+-------------------------------------------------------------------------------------------------------+|
|+--------+     6. salary spend 8. actual salary spend +-----+     +-------------------------------------+|
||5. plot |     7. salary data  9. salary data         |12. %|     |  11. horizontal bar plot department || 
||        |                                            +-----+     |                                     || 
|+--------+                                                        |                                     || 
|                                                                  |                                     || 
|+---------------------------------------------------------------+ |                                     || 
|| 10. Top employees salary deviation data frame                 | |                                     || 
||                                                               | |                                     || 
||                                                               | |                                     || 
||                                                               | |                                     || 
||                                                               | |                                     || 
||                                                               | |                                     || 
||                                                               | |                                     || 
|+---------------------------------------------------------------+ +-------------------------------------+|
+---------------------------------------------------------------------------------------------------------+
</pre>


In [44]:
def check_data_updated():
    '''
    This function checks when the salary data was last updated
    to calculate 2024 salary data later.
    '''
    # Set up Selenium WebDriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

    # URL of the page containing the data update info
    url = "https://data.louisvilleky.gov/datasets/8bd82421c9b94c37925fb37edaa1c5e8_0/explore"

    # Open the URL with Selenium
    driver.get(url)

    # Give some time for the page to load
    driver.implicitly_wait(5)

    # Targeting the specific list item that contains the "Data Updated" date and label
    data_updated_item = driver.find_element(By.CSS_SELECTOR, "li.metadata-item[data-test='modified']")

    # Extract both the date and label text
    date_div = data_updated_item.find_elements(By.TAG_NAME, 'div')[0].text
    label_div = data_updated_item.find_elements(By.TAG_NAME, 'div')[1].text

    # Print the extracted information
    print(f"Data Updated Date: {date_div}")
    print(f"Label: {label_div}")

    # Close the driver
    driver.quit()

# Call the function to check data update information
check_data_updated()


Data Updated Date: November 27, 2024
Label: Data Updated


In [45]:
data = pd.read_csv("../data/salary.csv")

In [46]:
date_div = 'November 27, 2024'

In [47]:
def process_salary_data(data, date_div):
    '''
    This function processes the salary data by performing several transformations:
    - Drops unnecessary columns
    - Calculates total expected salary
    - Fills missing values with 0
    - Merges department names
    - Calculates total weekly hours worked
    - Converts date to datetime and computes week and year for adjustments
    - Adjusts the annual rate for the specified year and week

    Args:
        data (pd.DataFrame): The input DataFrame with salary data
        date_div (str): The date string (from the check_data_updated function) to determine the week and year

    Returns:
        pd.DataFrame: The processed salary data
    '''
    # Drops columns we don't need
    cols_to_drop = ['jobTitle', 'Other', 'ObjectId']
    data = data.drop(columns=cols_to_drop)

    # Calculates total expected salary
    data['Salary_Total'] = data['Annual_Rate'] + data['Incentive_Allowance']

    # Filling missing data with 0
    data = data.fillna(0)

    # Merging the departments together
    data['Department'] = data['Department'].replace('Louisville Metro Police', 'Louisville Metro Police Department')
    data['Department'] = data['Department'].replace('Department of Corrections', 'Metro Corrections')

    # Calculates total weekly hours worked
    data['Hr_Rate'] = data['Regular_Rate'] / 2080
    data['Ot_Rate'] = data['Hr_Rate'] * 1.5
    data['Hr_Worked'] = data['Overtime_Rate'] / data['Ot_Rate'] / 52 + 40
    data.replace([np.inf, -np.inf], 40, inplace=True)

    # Convert the string to a datetime object
    date_object = datetime.strptime(date_div, "%B %d, %Y")

    # Get the week number of the calendar year
    week = date_object.isocalendar()[1]

    # Get the calendar year
    c_year = date_object.year

    # Check if there are any rows where the CalYear is equal to the specified c_year
    if (data['CalYear'] == c_year).any():
        # Convert Annual_Rate to weekly rate by dividing by 52
        data.loc[data['CalYear'] == c_year, 'Annual_Rate'] = data.loc[data['CalYear'] == c_year, 'Annual_Rate'] / 52

        # Scale the weekly rate for the specific week
        data.loc[data['CalYear'] == c_year, 'Annual_Rate'] = data.loc[data['CalYear'] == c_year, 'Annual_Rate'] * week

    return data


In [48]:
data = process_salary_data(data, date_div)
data.head()

Unnamed: 0,CalYear,Employee_Name,Department,Annual_Rate,Regular_Rate,Overtime_Rate,Incentive_Allowance,YTD_Total,Salary_Total,Hr_Rate,Ot_Rate,Hr_Worked
0,2019,"Pallast, Kirk",Parks & Recreation,43035.2,42527.35,399.6,0.0,42926.95,43035.2,20.445841,30.668762,40.250568
1,2019,"Williams, Arthur",Parks & Recreation,43014.4,42545.13,0.0,0.0,42545.13,43014.4,20.454389,30.681584,40.0
2,2019,"Addison, Joseph",Library,41974.4,41974.4,9.08,0.0,42040.3,41974.4,20.18,30.27,40.005769
3,2019,"Flint, Ruth",Library,54870.4,54870.4,0.0,0.0,54956.16,54870.4,26.38,39.57,40.0
4,2019,"Probus, Kelly",Develop Louisville,43014.4,42555.26,15.2,0.0,42570.46,43014.4,20.45926,30.688889,40.009525


# code for 5



In [49]:
# Code for 5
def plot_info(year, data) -> None:
    gauge = data.groupby(['CalYear'])[['YTD_Total', 'Salary_Total']].sum().reset_index()
    # Filter the data for the given year
    year_filter = gauge[gauge['CalYear'] == year]
    
    # Extract actual and expected values
    actual = year_filter['YTD_Total'].iloc[0]
    expected = year_filter['Salary_Total'].iloc[0]

    # Create the gauge plot
    steps = [
        {'range': [0, expected], 'color': '#004080'}  # Dark blue for expected salary
    ]
    
    # Add yellow step if actual exceeds expected
    if actual > expected:
        steps.append({'range': [expected, actual], 'color': 'yellow'})  # Yellow for actual salary
    
    fig = go.Figure(go.Indicator(
        mode="gauge+number+delta",
        value=actual,
        delta={
            'reference': expected,
            'increasing': {'color': "red"},
            'decreasing': {'color': "green"}
        },
        gauge={
            'axis': {'range': [0, expected * 1.1]},  
            'bar': {'color': 'rgba(0,0,0,0)'},
            'steps': steps,
            'threshold': {
                'line': {'color': "red", 'width': 4},  
                'thickness': .95,
                'value': actual  # Place the red line at the actual
            }
        },
        title={'text': f"{year} Budgeted Salary Spend vs. Salary Spend Actual", 'font': {'size': 20}},
    ))

    fig.update_layout(
        title_font={'size': 20},  # Set font size for the overall layout title
    )

    # Show the figure
    fig.show()


# Example usage:
plot_info(2024, data)


In [50]:
# Example usage:
plot_info(2022, data)

In [51]:
# code for 7
def calculate_total_spend(year, data) -> str:
    # Filter the data for the specified year
    year_filter = data[data['CalYear'] == year]
    
    # Calculate the sum of 'YTD_Total' for the filtered data
    total_spend = year_filter['YTD_Total'].sum().round(2)

    # Formats thousands separators
    total_spend = "{:,.2f}".format(total_spend)
    
    # Return the total spend as a float
    return str(total_spend)

In [52]:
# code for 9 
def calculate_total_budget(year, data) -> str:
    # Filter the data for the specified year
    year_filter = data[data['CalYear'] == year]
    
    # Calculate the sum of 'Salary_Total' for the filtered data
    total_spend = year_filter['Salary_Total'].sum().round(2)

    # Formats thousands separators
    total_spend = "{:,.2f}".format(total_spend)
    
    # Return the total spend as a float
    return str(total_spend)

In [53]:
#code for 12
def calculate_budget_difference(year, data) -> str:
    # Filter the data for the specified year
    year_filter = data[data['CalYear'] == year]
    
    if year_filter.empty:
        return "No data for the specified year."
    
    # Extract actual and expected values
    actual_spend = year_filter['YTD_Total'].sum()
    budgeted_salary = year_filter['Salary_Total'].sum()

    # Calculate the percentage difference
    difference = ((actual_spend - budgeted_salary) / budgeted_salary) * 100
    
    # Format the difference with a '+' or '-' and thousands separators
    return f"{difference:+,.2f}%"



In [54]:
# Call the function for 7
total_sal_spend = calculate_total_spend(2024, data)

print(f'Total Salary Spend: {total_sal_spend}')

Total Salary Spend: 373,062,743.42


In [55]:
# Call the function for 9
total_sal_budget = calculate_total_budget(2024, data)

print(f'Total Salary Budgeted: {total_sal_budget}')

Total Salary Budgeted: 450,699,319.02


In [56]:
# Call the function for 12 
budget_difference = calculate_budget_difference(2024, data)
print(budget_difference)

-17.23%


In [57]:
# code for 10
def top_emp_dev(year, data):
    # we had to avoid division by zero and only calculate Discrepancy_Percent for employees with Salary_Total >= 20k
    # Filter the data for the given year
    top_employee = data[data['CalYear'] == year].copy()

    # Calculate the discrepancy and discrepancy ratio
    top_employee['Deviation'] = top_employee['YTD_Total'] - top_employee['Salary_Total']

    # Avoid division by zero and handle Salary_Total < 20k
    top_employee.loc[:, 'Discrepancy_Percent'] = top_employee.apply(
        lambda row: (row['Deviation'] / row['Salary_Total']) * 100 if row['Salary_Total'] >= 20000 and row['Salary_Total'] != 0 else None, axis=1
    )

    # Round the Discrepancy_Percent to 2 decimal places
    top_employee['Discrepancy_Percent'] = top_employee['Discrepancy_Percent'].round(2)

    # Sort the DataFrame by 'Discrepancy_Percent' in descending order
    top_employee = top_employee.sort_values(by='Discrepancy_Percent', ascending=False)

    # Keep only the specified columns
    top_employee = top_employee[['CalYear', 'Employee_Name', 'Department', 
                                 'YTD_Total', 'Salary_Total', 'Deviation', 
                                 'Discrepancy_Percent']]
    
    # Reset index and drop the old index column
    top_employee.reset_index(drop=True, inplace=True)

    return top_employee.head(10)


In [58]:
# Example usage for 10:
top_employee_result = top_emp_dev(2024, data)
top_employee_result

Unnamed: 0,CalYear,Employee_Name,Department,YTD_Total,Salary_Total,Deviation,Discrepancy_Percent
0,2024,"Standard, Royce Leshawn",Metro Corrections,180770.98,66934.4,113836.58,170.07
1,2024,"Stimphil, Richardson",Metro Corrections,153587.46,60299.2,93288.26,154.71
2,2024,"Jones, Dontorya J",Emergency Management Services,144154.28,62583.6,81570.68,130.34
3,2024,"Jenkins, Todd Barry",Louisville Metro Police Department,251840.31,114357.73,137482.58,120.22
4,2024,"Roberts, Bryan Keith",Metro Corrections,125239.59,58635.2,66604.39,113.59
5,2024,"Whidby, Jacob T",Metro Corrections,156558.72,74256.0,82302.72,110.84
6,2024,"Nicolas-Bates, Marylea Diedra",Metro Corrections,106942.63,50960.0,55982.63,109.86
7,2024,"Starcher, Candice Lynn",Louisville Metro Police Department,96518.09,46248.36,50269.73,108.7
8,2024,"Ashby, Stephanie Renee",ES & MetroSafe,137706.0,66218.31,71487.69,107.96
9,2024,"Salman, Saja Naji",Metro Corrections,101862.7,50835.2,51027.5,100.38


In [59]:
# code for 11
def department_discrepancy(year, data):
    # Group by 'CalYear' and 'Department', summing 'YTD_Total' and 'Salary_Total'
    department = data.groupby(['CalYear', 'Department'])[['YTD_Total', 'Salary_Total']].sum().reset_index()

    # Filter the data by the given year
    department = department[department['CalYear'] == year]

    # Calculate the percentage difference: ((YTD_Total - Salary_Total) / Salary_Total) * 100
    department['Discrepancy_Percent'] = ((department['YTD_Total'] - department['Salary_Total']) / department['Salary_Total']) * 100

    # Round the Discrepancy_Percent to 2 decimal places
    department['Discrepancy_Percent'] = department['Discrepancy_Percent'].round(2)

    # Sort the DataFrame by 'Discrepancy_Percent' in ascending order
    department = department.sort_values(by='Discrepancy_Percent', ascending=False)

    # Format 'YTD_Total' and 'Salary_Total' with thousands separators
    department['YTD_Total'] = department['YTD_Total'].apply(lambda x: f"{x:,.2f}")
    department['Salary_Total'] = department['Salary_Total'].apply(lambda x: f"{x:,.2f}")

    # Rename columns for final output
    department = department.rename(columns={
        'YTD_Total': 'Total Salary Spend',
        'Salary_Total': 'Salary Budget',
        'Discrepancy_Percent': 'Discrepancy Percent'
    })

    # Return the result
    return department


In [60]:
# Example usage:
department_result = department_discrepancy(2024, data)
department_result

Unnamed: 0,CalYear,Department,Total Salary Spend,Salary Budget,Discrepancy Percent
224,2024,Louisville Metro Police Department,123082990.73,124125361.75,-0.84
228,2024,Metro Corrections,28968309.48,30877424.89,-6.18
213,2024,Emergency Management Services,11182592.57,11960136.14,-6.5
211,2024,ES & MetroSafe,11493629.77,12602656.88,-8.8
222,2024,Louisville Fire,42079902.81,46479751.63,-9.47
250,2024,Youth Transitional Services,1464017.29,1663070.33,-11.97
209,2024,Criminal Justice Commission,308243.95,354505.99,-13.05
238,2024,Office of Philanthropy,319561.43,367525.45,-13.05
243,2024,Parking Authority of River City - PARC,1897249.91,2247607.26,-15.59
247,2024,Records Compliance,918726.62,1127444.44,-18.51
