<a href="https://colab.research.google.com/github/abdyraman/hr-deep-learning/blob/main/deep_hr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import hvplot.pandas  # Import hvplot for DataFrame plotting
import holoviews as hv
import panel as pn
import matplotlib.pyplot as plt
pn.extension("tabulator","echarts", "plotly", "vega", "vizzu")

In [2]:
df_full = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')

**Data cleaning**

In [3]:
# remove 4 columns
df = df_full.drop(['Over18', 'EmployeeNumber','EmployeeCount','StandardHours'],axis=1)

In [4]:
df['Department'].unique()

array(['Sales', 'Research & Development', 'Human Resources'], dtype=object)

In [5]:
idf=df.interactive()

**Descriptive statistics**

In [6]:
# Widget for gender selection as a CheckBoxGroup
gender_source = pn.widgets.CheckBoxGroup(
    name='Gender', 
    options=['Female', 'Male'],
    value=['Female', 'Male']  # Sets both options as selected by default
)


## Monthly Income

In [None]:
# Define the pipeline
def create_pipeline(selected_gender):
    return (
        df[df['Gender'] == selected_gender]
        .groupby(['YearsAtCompany','Age'])['MonthlyIncome']
        .mean()
        .round(0) 
        .reset_index()
    )

In [None]:
# Function to create the plot based on selected genders (adapted for multiple selections)
def averagelinearplot_gender_age(selected_genders):
    plots = []
    color_map = {'Female': 'blue', 'Male': 'orange'}
    
    for gender in selected_genders:
        average_data = create_pipeline(gender)
        # Plot 1: Average MonthlyIncome by YearsAtCompany (aggregating across Age)
        avg_income_by_years = average_data.groupby('YearsAtCompany')['MonthlyIncome'].mean().reset_index()
        avg_income_by_years['MonthlyIncome'] = avg_income_by_years['MonthlyIncome'].round(0)
        plot1 = avg_income_by_years.hvplot.line(
            x="YearsAtCompany",
            y='MonthlyIncome',
            line_color=color_map[gender],
            line_width=5,
            title="Average Monthly Income by Years at Company",
            ylabel="Average Monthly Income",
            xlabel="Years at Company",
            ylim=(0, avg_income_by_years['MonthlyIncome'].max() + 10)
        )
        plots.append(plot1)  # Add plot to the list
    
    return hv.Overlay(plots) if plots else hv.Overlay()

# Bind the plotting function to the CheckBoxGroup widget
averages_yearsatcompany_gender_linear_plot = pn.bind(averagelinearplot_gender_age, selected_genders=gender_source)


In [None]:
# Function to create the plot based on selected genders (adapted for multiple selections)
def averagelinearplot_gender_employeeage(selected_genders):
    plots = []
    color_map = {'Female': 'blue', 'Male': 'orange'}
    
    for gender in selected_genders:
        average_data = create_pipeline(gender)
        # Plot 1: Average MonthlyIncome by YearsAtCompany (aggregating across Age)
        avg_income_by_age = average_data.groupby('Age')['MonthlyIncome'].mean().reset_index()
        avg_income_by_age['MonthlyIncome'] = avg_income_by_age['MonthlyIncome'].round(0)
        plot2 = avg_income_by_age.hvplot.line(
            x="Age",
            y='MonthlyIncome',
            line_color=color_map[gender],
            line_width=5,
            title="Average Monthly Income by Employee Age at Company",
            ylabel="Average Monthly Income",
            xlabel="Age",
            ylim=(0, avg_income_by_age['MonthlyIncome'].max() + 10)
        )
        plots.append(plot2)  # Add plot to the list
    
    return hv.Overlay(plots) if plots else hv.Overlay()

# Bind the plotting function to the CheckBoxGroup widget
averages_employeeage_gender_linear_plot = pn.bind(averagelinearplot_gender_employeeage, selected_genders=gender_source)


## Hourly Rate

In [None]:
# Assuming gender_source is a dropdown or radio button
# Ensure that the selected gender is being passed correctly
def create_attrition_pipeline(selected_gender):
    # Check if the selected_gender is a valid entry
    if selected_gender not in df['Gender'].unique():
        raise ValueError(f"Invalid gender selection: {selected_gender}")

    return (
        df[df['Gender'] == selected_gender]
        .groupby(['Attrition'])['MonthlyIncome']  # Only use MonthlyIncome for calculation
        .mean()
        .reset_index()
        .round(0)  # Round the mean to the nearest whole number
    )

def update_averages_plot(selected_gender):
    average_data = create_attrition_pipeline(selected_gender)  # Create the pipeline

    # Define the color based on the selected gender
    color_map = {
        'Female': 'blue',
        'Male': 'orange'
    }
    color = color_map[selected_gender]  # Get the color for the selected gender

    # Create a bar plot for MonthlyIncome grouped by Attrition
    fig = average_data.hvplot.bar(
        x='Attrition',
        y='MonthlyIncome',
        title=f'Average Monthly Income by Gender and Attrition ({selected_gender})',
        ylabel='Average Monthly Income',
        xlabel='Attrition',
        ylim=(0, average_data['MonthlyIncome'].max() + 10),
        color=color,
        legend='top_left'
    )

    # Add data labels on top of the bars
    for i, row in average_data.iterrows():
        fig *= hv.Text(x=row['Attrition'], y=row['MonthlyIncome'] + 2, text=str(int(row['MonthlyIncome'])), halign='center', valign='bottom')

    return fig

# Bind the plot to the selected gender
averages_plot_panel = pn.bind(update_averages_plot, selected_gender=gender_source)


In [None]:
# Define the pipeline
def create_pipeline_2(selected_gender):
    return (
        df[df['Gender'] == selected_gender]
        .groupby(['YearsAtCompany','Age'])['HourlyRate']
        .mean()
        .round(0) 
        .reset_index()
    )

In [None]:
# Function to create the plot based on selected genders (adapted for multiple selections)
def averagelinearplot_gender_age(selected_genders):
    plots = []
    color_map = {'Female': 'blue', 'Male': 'orange'}
    
    for gender in selected_genders:
        average_data = create_pipeline_2(gender)
        # Plot 1: Average HourlyIncome by YearsAtCompany (aggregating across Age)
        avg_income_by_years = average_data.groupby('YearsAtCompany')['HourlyRate'].mean().reset_index()
        avg_income_by_years['HourlyRate'] = avg_income_by_years['HourlyRate'].round(0)
        plot1 = avg_income_by_years.hvplot.line(
            x="YearsAtCompany",
            y='HourlyRate',
            line_color=color_map[gender],
            line_width=5,
            title="Average Hourly Rate by Years at Company",
            ylabel="Average HourlyRate",
            xlabel="Years at Company",
            ylim=(0, avg_income_by_years['HourlyRate'].max() + 10)
        )
        plots.append(plot1)  # Add plot to the list
    
    return hv.Overlay(plots) if plots else hv.Overlay()

# Bind the plotting function to the CheckBoxGroup widget
hourly_yearsatcompany_gender_linear_plot = pn.bind(averagelinearplot_gender_age, selected_genders=gender_source)


In [None]:
# Function to create the plot based on selected genders (adapted for multiple selections)
def averagelinearplot_gender_employeeage(selected_genders):
    plots = []
    color_map = {'Female': 'blue', 'Male': 'orange'}
    
    for gender in selected_genders:
        average_data = create_pipeline_2(gender)
        # Plot 1: Average HourlyIncome by Age
        avg_income_by_age = average_data.groupby('Age')['HourlyRate'].mean().reset_index()
        avg_income_by_age['HourlyRate'] = avg_income_by_age['HourlyRate'].round(0)
        plot2 = avg_income_by_age.hvplot.line(
            x="Age",
            y='HourlyRate',
            line_color=color_map[gender],
            line_width=5,
            title="Average HourlyRate by Employee Age at Company",
            ylabel="Average Hourly Rate",
            xlabel="Age",
            ylim=(0, avg_income_by_age['HourlyRate'].max() + 10)
        )
        plots.append(plot2)  # Add plot to the list
    
    return hv.Overlay(plots) if plots else hv.Overlay()

# Bind the plotting function to the CheckBoxGroup widget
hourly_employeeage_gender_linear_plot = pn.bind(averagelinearplot_gender_employeeage, selected_genders=gender_source)


Panel 1

In [None]:
# Text label above the gender selection widget
gender_selection_text = pn.pane.Markdown("### Check on the box below to display the data for either Females or Males")

# Define the layout for the top row and bottom row
top_row_layout = pn.Row(averages_yearsatcompany_gender_linear_plot,hourly_yearsatcompany_gender_linear_plot)
down_row_layout = pn.Row(averages_employeeage_gender_linear_plot,hourly_employeeage_gender_linear_plot)

# Additional sidebar information
logo = 'accelerateinclusion.png'
sidebar_text = """
Inclusion Accelerator has conducted an analysis of a publicly available IBM HR Analytics dataset from Kaggle, which includes data from nearly 1,500 current and former employees. 
This interactive dashboard offers insights into the relationships between Monthly Income and Hourly Rate, taking into account employees' tenure with the company and age, as well as disaggregating these metrics by gender."""

# Create a Panel layout for the sidebar with the text and the widget
sidebar_layout = pn.Column(sidebar_text, gender_selection_text, gender_source)

# Create the dashboard template
dashboard_template = pn.template.FastListTemplate(
    title="Interactive Dashboard",
    logo=logo,
    sidebar=sidebar_layout,
    sidebar_width=250,
    main=[top_row_layout, down_row_layout]
)

# Display the dashboard
dashboard_template.show()

## Attrition

In [7]:
# Widget for attrition selection
attrition_source = pn.widgets.CheckBoxGroup(
    name='Attrition',
    options=['Yes', 'No'],  # Options for attrition
    value=['Yes', 'No']     # Sets both options selected by default
)


In [8]:
# Widget for attrition selection
department_source = pn.widgets.CheckBoxGroup(
    name='Departments',
    options=['Sales', 'Research & Development', 'Human Resources'],  # Options for attrition
    value=['Sales', 'Research & Development', 'Human Resources']    # Sets both options selected by default
)

In [9]:
# Pipeline function to filter and group data
def create_attrition_pipeline(selected_attrition, selected_department):
    if isinstance(selected_attrition, str):
        selected_attrition = [selected_attrition]
    if isinstance(selected_department, str):
        selected_department = [selected_department]
    
    # Filter and group by Gender, Attrition, and Department
    return (
        df[
            df['Attrition'].isin(selected_attrition) & 
            df['Department'].isin(selected_department)
        ]
        .groupby(['Gender', 'Attrition', 'Department'])['MonthlyIncome']
        .mean()
        .reset_index()
        .round(0)
    )

In [10]:
# Update function for displaying values for females
def update_female_plot(selected_attrition, selected_department):
    if not selected_attrition:
        selected_attrition = ['Yes', 'No']
    if not selected_department:
        selected_department = ['Sales', 'Research & Development', 'Human Resources']
    
    average_data = create_attrition_pipeline(selected_attrition, selected_department)

    # Prepare text output for females
    female_data = average_data[average_data['Gender'] == 'Female']
    
    if female_data.empty:
        # If there's no data for females, handle that case
        female_data = pd.DataFrame({
            'Gender': ['Female'] * 2,
            'Attrition': ['Yes', 'No'],
            'MonthlyIncome': [0, 0],
            'Department': ['N/A', 'N/A']  # Adding a placeholder for the Department
        })
    
    # Calculate the mean of the monthly income for females and round it
    female_mean_income = female_data['MonthlyIncome'].mean().round(0)

    # Constructing the output string
    female_output = f"### Average Monthly Income for Females: ${int(female_mean_income)}\n"

    # Filter data for females who left the company (Attrition = 1)
    left_females = female_data[female_data['Attrition'] == 'Yes']
    # Filter data for females who stayed with the company (Attrition = 0)
    stayed_females = female_data[female_data['Attrition'] == 'No']

    # Writing for females who left the company
    if not left_females.empty:
        female_output += "\n**Females Who Left the Company:**\n"
        for _, row in left_females.iterrows():
            female_output += f"- **{row['Department']}**: ${int(row['MonthlyIncome'])}\n"

    # Writing for females who continued working
    if not stayed_females.empty:
        female_output += "\n**Females Who Stayed with the Company:**\n"
        for _, row in stayed_females.iterrows():
            female_output += f"- **{row['Department']}**: ${int(row['MonthlyIncome'])}\n"

    return pn.pane.Markdown(female_output)

# Binding the update functions with the widget values
female_plot_panel = pn.bind(update_female_plot, selected_attrition=attrition_source, selected_department=department_source)


In [11]:
# Update function for displaying values for females
def update_male_plot(selected_attrition, selected_department):
    if not selected_attrition:
        selected_attrition = ['Yes', 'No']
    if not selected_department:
        selected_department = ['Sales', 'Research & Development', 'Human Resources']
    
    average_data = create_attrition_pipeline(selected_attrition, selected_department)

    # Prepare text output for females
    male_data = average_data[average_data['Gender'] == 'Male']
    
    if male_data.empty:
        # If there's no data for females, handle that case
        male_data = pd.DataFrame({
            'Gender': ['Male'] * 2,
            'Attrition': ['Yes', 'No'],
            'MonthlyIncome': [0, 0],
            'Department': ['N/A', 'N/A']  # Adding a placeholder for the Department
        })
    
    # Calculate the mean of the monthly income for females and round it
    male_mean_income = male_data['MonthlyIncome'].mean().round(0)

    # Constructing the output string
    male_output = f"### Average Monthly Income for Males: ${int(male_mean_income)}\n"

    # Filter data for females who left the company (Attrition = 1)
    left_males = male_data[male_data['Attrition'] == 'Yes']
    # Filter data for females who stayed with the company (Attrition = 0)
    stayed_males = male_data[male_data['Attrition'] == 'No']

    # Writing for females who left the company
    if not left_males.empty:
        male_output += "\n**Males Who Left the Company:**\n"
        for _, row in left_males.iterrows():
            male_output += f"- **{row['Department']}**: ${int(row['MonthlyIncome'])}\n"

    # Writing for females who continued working
    if not stayed_males.empty:
        male_output += "\n**Males Who Stayed with the Company:**\n"
        for _, row in stayed_males.iterrows():
            male_output += f"- **{row['Department']}**: ${int(row['MonthlyIncome'])}\n"

    return pn.pane.Markdown(male_output)

# Binding the update functions with the widget values
male_plot_panel = pn.bind(update_male_plot, selected_attrition=attrition_source, selected_department=department_source)


In [22]:
# Function to create bar plot for females
def create_female_bar_plot(selected_attrition, selected_department):
    average_data = create_attrition_pipeline(selected_attrition, selected_department)

    # Filter female data
    female_data = average_data[average_data['Gender'] == 'Female']
    
    # Handle empty female data case
    if female_data.empty:
        female_data = pd.DataFrame({
            'Department': ['Sales', 'Research & Development', 'Human Resources'],
            'MonthlyIncome': [0, 0, 0],
            'Attrition': ['Yes', 'No', 'Yes']  # Example attrition for the empty DataFrame
        })

    # Prepare the plot data
    female_grouped = female_data.groupby(['Department', 'Attrition'])['MonthlyIncome'].mean().unstack()

    # Create the plot
    fig, ax = plt.subplots(figsize=(6, 4))
    female_grouped.plot(kind='bar', ax=ax, color=['blue', 'lightblue'], edgecolor='black')
    
    # Set title and labels
    ax.set_title('Average Monthly Income for Females by Attrition Status')
    ax.set_xlabel('Department')
    ax.set_ylabel('Average Monthly Income ($)')
    ax.set_ylim(0, female_grouped.max().max() * 1.1)
    plt.xticks(rotation=0)
    ax.legend(title='Attrition', loc='upper left')

    return pn.pane.Matplotlib(fig)

# Function to create bar plot for males
def create_male_bar_plot(selected_attrition, selected_department):
    average_data = create_attrition_pipeline(selected_attrition, selected_department)

    # Filter male data
    male_data = average_data[average_data['Gender'] == 'Male']
    
    # Handle empty male data case
    if male_data.empty:
        male_data = pd.DataFrame({
            'Department': ['Sales', 'Research & Development', 'Human Resources'],
            'MonthlyIncome': [0, 0, 0],
            'Attrition': ['Yes', 'No', 'Yes']  # Example attrition for the empty DataFrame
        })

    # Prepare the plot data
    male_grouped = male_data.groupby(['Department', 'Attrition'])['MonthlyIncome'].mean().unstack()

    # Create the plot
    fig, ax = plt.subplots(figsize=(6, 4))
    male_grouped.plot(kind='bar', ax=ax, color=['orange', 'lightcoral'], edgecolor='black')

    # Set title and labels
    ax.set_title('Average Monthly Income for Males by Attrition Status')
    ax.set_xlabel('Department')
    ax.set_ylabel('Average Monthly Income ($)')
    ax.set_ylim(0, male_grouped.max().max() * 1.1)
    plt.xticks(rotation=0)
    ax.legend(title='Attrition', loc='upper left')

    return pn.pane.Matplotlib(fig)

# Binding the update functions with the widget values
female_barplot_panel = pn.bind(create_female_bar_plot, selected_attrition=attrition_source, selected_department=department_source)
male_barplot_panel = pn.bind(create_male_bar_plot, selected_attrition=attrition_source, selected_department=department_source)


Panel 2

In [23]:
# Text label above the gender selection widget
selection_text_one = pn.pane.Markdown("### Uncheck the box below to display the average monthly income for employees who have left the company.")
selection_text_two = pn.pane.Markdown("### Uncheck the boxes below to filter the departments for which you want to see average income data.")

# Define the layout for the top row and bottom row
top_row_layout = pn.Row(female_plot_panel,female_barplot_panel)
down_row_layout = pn.Row(male_plot_panel,male_barplot_panel)

# Additional sidebar information
logo = 'accelerateinclusion.png'
sidebar_text = """
Employee attrition measures how many workers have left an organization and is a common metric companies use to assess their performance."""

# Create a Panel layout for the sidebar with the text and the widget
sidebar_layout = pn.Column(sidebar_text, selection_text_one, attrition_source,selection_text_two, department_source)

# Create the dashboard template
dashboard_template = pn.template.FastListTemplate(
    title="Interactive Dashboard",
    logo=logo,
    sidebar=sidebar_layout,
    sidebar_width=250,
    main=[top_row_layout, down_row_layout]
)

# Display the dashboard
dashboard_template.show()

Launching server at http://localhost:53058


  ax.set_ylim(0, female_grouped.max().max() * 1.1)
  ax.set_ylim(0, male_grouped.max().max() * 1.1)


<panel.io.server.Server at 0x1739f2050>

In [12]:
# # Function to create bar plot for females
# def create_female_bar_plot(selected_attrition, selected_department):
#     average_data = create_attrition_pipeline(selected_attrition, selected_department)
    
#     female_data = average_data[average_data['Gender'] == 'Female']
#     if female_data.empty:
#         female_data = pd.DataFrame({
#             'Department': ['Sales', 'Research & Development', 'Human Resources'],
#             'MonthlyIncome': [0, 0, 0]
#         })

#     fig, ax = plt.subplots(figsize=(6, 4))
#     ax.bar(female_data['Department'], female_data['MonthlyIncome'], color='blue')
#     ax.set_title('Average Monthly Income for Females')
#     ax.set_xlabel('Department')
#     ax.set_ylabel('Average Monthly Income ($)')
#     ax.set_ylim(0, max(female_data['MonthlyIncome'].max(), 1) * 1.1)
#     plt.xticks(rotation=45)

#     return pn.pane.Matplotlib(fig)

# # Function to create bar plot for males
# def create_male_bar_plot(selected_attrition, selected_department):
#     average_data = create_attrition_pipeline(selected_attrition, selected_department)

#     male_data = average_data[average_data['Gender'] == 'Male']
#     if male_data.empty:
#         male_data = pd.DataFrame({
#             'Department': ['Sales', 'Research & Development', 'Human Resources'],
#             'MonthlyIncome': [0, 0, 0]
#         })

#     fig, ax = plt.subplots(figsize=(6, 4))
#     ax.bar(male_data['Department'], male_data['MonthlyIncome'], color='orange')
#     ax.set_title('Average Monthly Income for Males')
#     ax.set_xlabel('Department')
#     ax.set_ylabel('Average Monthly Income ($)')
#     ax.set_ylim(0, max(male_data['MonthlyIncome'].max(), 1) * 1.1)
#     plt.xticks(rotation=45)

#     return pn.pane.Matplotlib(fig)

# # Binding the update functions with the widget values
# female_barplot_panel = pn.bind(create_female_bar_plot, selected_attrition=attrition_source, selected_department=department_source)
# male_barplot_panel = pn.bind(create_male_bar_plot, selected_attrition=attrition_source, selected_department=department_source)
