<a href="https://colab.research.google.com/github/abdyraman/hr-deep-learning/blob/main/deep_hr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import hvplot.pandas  # Import hvplot for DataFrame plotting
import holoviews as hv
import panel as pn
pn.extension("tabulator","echarts", "plotly", "vega", "vizzu")

In [21]:
df_full = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')

**Data cleaning**

In [22]:
# remove 4 columns
df = df_full.drop(['Over18', 'EmployeeNumber','EmployeeCount','StandardHours'],axis=1)

In [23]:
idf=df.interactive()

**Descriptive statistics**

In [24]:
# Widget for gender selection as a CheckBoxGroup
gender_source = pn.widgets.CheckBoxGroup(
    name='Gender', 
    options=['Female', 'Male'],
    value=['Female', 'Male']  # Sets both options as selected by default
)


Monthly Income

In [25]:
# Define the pipeline
def create_pipeline(selected_gender):
    return (
        df[df['Gender'] == selected_gender]
        .groupby(['YearsAtCompany','Age','Department','Attrition'])['MonthlyIncome']
        .mean()
        .round(0) 
        .reset_index()
    )

In [26]:
# Function to create the plot based on selected genders (adapted for multiple selections)
def averagelinearplot_gender_age(selected_genders):
    plots = []
    color_map = {'Female': 'blue', 'Male': 'orange'}
    
    for gender in selected_genders:
        average_data = create_pipeline(gender)
        # Plot 1: Average MonthlyIncome by YearsAtCompany (aggregating across Age)
        avg_income_by_years = average_data.groupby('YearsAtCompany')['MonthlyIncome'].mean().reset_index()
        avg_income_by_years['MonthlyIncome'] = avg_income_by_years['MonthlyIncome'].round(0)
        plot1 = avg_income_by_years.hvplot.line(
            x="YearsAtCompany",
            y='MonthlyIncome',
            line_color=color_map[gender],
            line_width=5,
            title="Average Monthly Income by Years at Company",
            ylabel="Average Monthly Income",
            xlabel="Years at Company",
            ylim=(0, avg_income_by_years['MonthlyIncome'].max() + 10)
        )
        plots.append(plot1)  # Add plot to the list
    
    return hv.Overlay(plots) if plots else hv.Overlay()

# Bind the plotting function to the CheckBoxGroup widget
averages_yearsatcompany_gender_linear_plot = pn.bind(averagelinearplot_gender_age, selected_genders=gender_source)


In [27]:
# Function to create the plot based on selected genders (adapted for multiple selections)
def averagelinearplot_gender_employeeage(selected_genders):
    plots = []
    color_map = {'Female': 'blue', 'Male': 'orange'}
    
    for gender in selected_genders:
        average_data = create_pipeline(gender)
        # Plot 1: Average MonthlyIncome by YearsAtCompany (aggregating across Age)
        avg_income_by_age = average_data.groupby('Age')['MonthlyIncome'].mean().reset_index()
        avg_income_by_age['MonthlyIncome'] = avg_income_by_age['MonthlyIncome'].round(0)
        plot2 = avg_income_by_age.hvplot.line(
            x="Age",
            y='MonthlyIncome',
            line_color=color_map[gender],
            line_width=5,
            title="Average Monthly Income by Employee Age at Company",
            ylabel="Average Monthly Income",
            xlabel="Age",
            ylim=(0, avg_income_by_age['MonthlyIncome'].max() + 10)
        )
        plots.append(plot2)  # Add plot to the list
    
    return hv.Overlay(plots) if plots else hv.Overlay()

# Bind the plotting function to the CheckBoxGroup widget
averages_employeeage_gender_linear_plot = pn.bind(averagelinearplot_gender_employeeage, selected_genders=gender_source)


In [28]:
# Text label above the gender selection widget
gender_selection_text = pn.pane.Markdown("### Check on the box below to display the data for either Females or Males")

# Define the layout for the top row and bottom row
top_row_layout = pn.Row(averages_yearsatcompany_gender_linear_plot)
down_row_layout = pn.Row(averages_employeeage_gender_linear_plot)

# Additional sidebar information
logo = 'accelerateinclusion.png'
sidebar_text = """
Inclusion Accelerator analyzed a publicly available IBM HR Analytics dataset, published on Kaggle, which includes data from nearly 1,500 current and former employees. This dataset offers insights into job satisfaction, work-life balance, tenure, experience, salary, and demographic details.
"""

# Create a Panel layout for the sidebar with the text and the widget
sidebar_layout = pn.Column(sidebar_text, gender_selection_text, gender_source)

# Create the dashboard template
dashboard_template = pn.template.FastListTemplate(
    title="Interactive Dashboard",
    logo=logo,
    sidebar=sidebar_layout,
    sidebar_width=250,
    main=[top_row_layout, down_row_layout]
)

# Display the dashboard
dashboard_template.show()

Launching server at http://localhost:60063


<panel.io.server.Server at 0x17162ce90>



2024-11-05 19:13:57,341 ERROR: panel.reactive - Callback failed for object named 'Gender' changing property {'value': []} 
Traceback (most recent call last):
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/panel/reactive.py", line 462, in _process_events
    self.param.update(**self_params)
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/param/parameterized.py", line 2319, in update
    restore = dict(self_._update(arg, **kwargs))
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/param/parameterized.py", line 2352, in _update
    self_._batch_call_watchers()
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/param/parameterized.py", line 2546, in _batch_call_watchers
    self_._execute_watcher(watcher, events)
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/param/parameterized.py", line 2506, in _execute_watcher
    watc

ERROR:tornado.application:Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <tornado.platform.asyncio.AsyncIOMainLoop object at 0x10f863e90>>, <Task finished name='Task-803' coro=<ServerSession.with_document_locked() done, defined at /Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/bokeh/server/session.py:77> exception=SkipRendering('bokeh backend could not plot any Elements in the Overlay.')>)
Traceback (most recent call last):
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/tornado/ioloop.py", line 738, in _run_callback
    ret = callback()
          ^^^^^^^^^^
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/tornado/ioloop.py", line 762, in _discard_future_result
    future.result()
  File "/Users/zhibekabdyramanova/anaconda3/lib/python3.11/site-packages/bokeh/server/session.py", line 98, in _needs_document_lock_wrapper
    result = await result
             ^^^^^^^^^^^^
  File "/Us

In [20]:
# Assuming gender_source is a dropdown or radio button
# Ensure that the selected gender is being passed correctly
def create_attrition_pipeline(selected_gender):
    # Check if the selected_gender is a valid entry
    if selected_gender not in df['Gender'].unique():
        raise ValueError(f"Invalid gender selection: {selected_gender}")

    return (
        df[df['Gender'] == selected_gender]
        .groupby(['Attrition'])['MonthlyIncome']  # Only use MonthlyIncome for calculation
        .mean()
        .reset_index()
        .round(0)  # Round the mean to the nearest whole number
    )

def update_averages_plot(selected_gender):
    average_data = create_attrition_pipeline(selected_gender)  # Create the pipeline

    # Define the color based on the selected gender
    color_map = {
        'Female': 'blue',
        'Male': 'orange'
    }
    color = color_map[selected_gender]  # Get the color for the selected gender

    # Create a bar plot for MonthlyIncome grouped by Attrition
    fig = average_data.hvplot.bar(
        x='Attrition',
        y='MonthlyIncome',
        title=f'Average Monthly Income by Gender and Attrition ({selected_gender})',
        ylabel='Average Monthly Income',
        xlabel='Attrition',
        ylim=(0, average_data['MonthlyIncome'].max() + 10),
        color=color,
        legend='top_left'
    )

    # Add data labels on top of the bars
    for i, row in average_data.iterrows():
        fig *= hv.Text(x=row['Attrition'], y=row['MonthlyIncome'] + 2, text=str(int(row['MonthlyIncome'])), halign='center', valign='bottom')

    return fig

# Bind the plot to the selected gender
averages_plot_panel = pn.bind(update_averages_plot, selected_gender=gender_source)


Old Panel

In [None]:
# Widget for gender selection as a ToggleGroup
gender_source = pn.widgets.ToggleGroup(
    name='Gender', 
    options=['Female', 'Male'], 
    button_type='success',
     behavior='radio'  # This makes it behave like a radio button group
)
gender_source

Years at company

In [None]:
# Define the pipeline
def create_pipeline(selected_gender):
    return (
        df[df['Gender'] == selected_gender]
        .groupby(['YearsAtCompany'])['MonthlyIncome']
        .mean()
        .round(0) 
        .reset_index()
    )

In [None]:


# Function to create the plot based on the selected gender
def averagelinearplot_gender_age(selected_gender):
    # Create the pipeline based on selected gender
    average_data = create_pipeline(selected_gender)
    
    # Define colors for each gender
    color_map = {
        'Female': 'blue',  # Color for Female
        'Male': 'orange'   # Color for Male
    }

    # Plot the average values by YearsAtCompany, only for the selected Gender
    fig = average_data.hvplot.line(
        x="YearsAtCompany",                # Use YearsAtCompany as the x-axis
        y='MonthlyIncome',                 # Always plot MonthlyIncome
        line_color=color_map[selected_gender],  # Set color based on selected gender
        line_width=5,                       # Line width
        title=f"Average Monthly Income by Years at Company ({selected_gender})",
        ylabel="Average Monthly Income",
        xlabel="Years at Company",
        ylim=(0, average_data['MonthlyIncome'].max() + 10)  # Set y-axis limit based on max of selected variable
    )

    return fig

# Create a Panel layout to include the plot
averages_yearsatcompany_gender_linear_plot = pn.bind(averagelinearplot_gender_age, selected_gender=gender_source)

Employee Age

In [None]:
# Define the pipeline for Age
def create_age_pipeline(selected_gender):
    return (
        df[df['Gender'] == selected_gender]
        .groupby(['Age'])['MonthlyIncome']
        .mean()
        .round(0) 
        .reset_index()
    )

# Function to create the plot based on Age
def plot_average_income_by_age(selected_gender):
    average_data = create_age_pipeline(selected_gender)
    
    color_map = {
        'Female': 'blue',
        'Male': 'orange'
    }

    fig = average_data.hvplot.line(
        x="Age",
        y='MonthlyIncome',
        line_color=color_map[selected_gender],
        line_width=5,
        title=f"Average Monthly Income by Employee Age ({selected_gender})",
        ylabel="Average Monthly Income",
        xlabel="Employee Age",
        ylim=(0, average_data['MonthlyIncome'].max() + 10)
    )

    return fig

# Create a Panel layout to include both plots
averages_age_gender_linear_plot = pn.Row(
    pn.bind(plot_average_income_by_age, selected_gender=gender_source)
)


Attrition

In [None]:
# Define the pipeline for calculating averages based on selected gender
def create_attrition_pipeline(selected_gender):
    return (
        df[df['Gender'] == selected_gender]
        .groupby(['Attrition'])['MonthlyIncome']  # Only use MonthlyIncome for calculation
        .mean()
        .reset_index()
        .round(0)  # Round the mean to the nearest whole number
    )

# Function to create the plot based on the selected gender
def update_averages_plot(selected_gender):
    average_data = create_attrition_pipeline(selected_gender)  # Create the pipeline

    # Define the color based on the selected gender
    color_map = {
        'Female': 'blue',
        'Male': 'orange'
    }
    color = color_map[selected_gender]  # Get the color for the selected gender

    # Create a bar plot for MonthlyIncome grouped by Attrition
    fig = average_data.hvplot.bar(
        x='Attrition',          # Use Attrition status as the x-axis
        y='MonthlyIncome',      # Fixed to MonthlyIncome
        title=f'Average Monthly Income by Gender and Attrition ({selected_gender})',
        ylabel='Average Monthly Income',
        xlabel='Attrition',
        ylim=(0, average_data['MonthlyIncome'].max() + 10),  # Set y-axis limit based on max of selected variable
        color=color,           # Set color based on selected gender
        legend='top_left'      # Position of the legend
    )

    # Add data labels on top of the bars, positioned closer to the edge
    for i, row in average_data.iterrows():
        fig = fig * hv.Text(x=row['Attrition'], y=row['MonthlyIncome'] + 2, text=str(int(row['MonthlyIncome'])), halign='center', valign='bottom')

    return fig

# Create a Panel layout to include the plot and the gender widget
averages_plot_panel = pn.bind(update_averages_plot, selected_gender=gender_source)


Department

In [None]:
# Define the pipeline for calculating averages based on selected gender
def create_department_pipeline(selected_gender):
    return (
        df[df['Gender'] == selected_gender]
        .groupby(['Department'])['MonthlyIncome']  # Change to Department for calculation
        .mean()
        .reset_index()
        .round(0)  # Round the mean to the nearest whole number
    )

# Function to create the plot based on the selected gender
def update_averages_plot(selected_gender):
    average_data = create_department_pipeline(selected_gender)  # Create the pipeline

    # Define the color based on the selected gender
    color_map = {
        'Female': 'blue',
        'Male': 'orange'
    }
    color = color_map[selected_gender]  # Get the color for the selected gender

    # Create a bar plot for MonthlyIncome grouped by Department
    fig = average_data.hvplot.bar(
        x='Department',         # Use Department as the x-axis
        y='MonthlyIncome',      # Fixed to MonthlyIncome
        title=f'Average Monthly Income by Gender and Department ({selected_gender})',
        ylabel='Average Monthly Income',
        xlabel='Department',
        ylim=(0, average_data['MonthlyIncome'].max() + 10),  # Set y-axis limit based on max of selected variable
        color=color,            # Set color based on selected gender
        legend='top_left'       # Position of the legend
    )

    # Add data labels on top of the bars, positioned closer to the edge
    for i, row in average_data.iterrows():
        fig = fig * hv.Text(x=row['Department'], y=row['MonthlyIncome'] + 2, text=str(int(row['MonthlyIncome'])), halign='center', valign='bottom')

    return fig

# Create a Panel layout to include the plot and the gender widget
averages_department_plot_panel = pn.bind(update_averages_plot, selected_gender=gender_source)


Dashboard

In [None]:

# Text label above the gender selection widget
gender_selection_text = pn.pane.Markdown("### Click on the button below to display the data for either Females or Males")

# Define the top row layout with two plots
top_row_layout = pn.Row(
    averages_yearsatcompany_gender_linear_plot,  # First plot
    averages_plot_panel                          # Second plot
)

# Define the bottom row layout with two additional plots
down_row_layout = pn.Row(
    averages_age_gender_linear_plot,             # Third plot
    averages_department_plot_panel               # Fourth plot
)

# Additional sidebar information
logo = 'accelerateinclusion.png'
sidebar_text = """
Inclusion Accelerator analyzed a publicly available IBM HR Analytics dataset, published on Kaggle, which includes data from nearly 1,500 current and former employees. This dataset offers insights into job satisfaction, work-life balance, tenure, experience, salary, and demographic details.
"""

# Create a Panel layout for the sidebar with the text and the widget
sidebar_layout = pn.Column(sidebar_text, gender_selection_text, gender_source)

# Creating the dashboard template with a specified sidebar width
dashboard_template = pn.template.FastListTemplate(
    title="Interactive Dashboard",
    logo=logo,
    sidebar=sidebar_layout,  # Add the widget to the sidebar
    sidebar_width=250,       # Set the sidebar width to 250 pixels
    main=[top_row_layout, down_row_layout],  # Combine the rows in the main area
)

# Display the dashboard
dashboard_template.show()
