<a href="https://colab.research.google.com/github/cbonnin88/people-analytics/blob/main/hrmanager_toolkit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
!pip install streamlit pyngrok
!pip install ngrok



In [82]:
%%writefile app.py

import streamlit as st
import polars as pl
import plotly.express as px
import plotly.graph_objects as go
import joblib


# Load Models and Columns
try:
  attrition_model = joblib.load('attrition_model.joblib')
  attrition_model_cols = joblib.load('attrition_model_columns.joblib')
  salary_model = joblib.load('salary_model.joblib')

  # Load the main dataset with Polars for visualization
  df_people = pl.read_csv('df_people.csv')
except FileNotFoundError as e:
  st.error(f'A required file was not found: {e.filename}. Please ensure all .joblib and df_people.csv files are in the same directory')
  st.stop()

# Page Configuration
st.set_page_config(layout='wide',page_title='ConnectSphere HR Toolkit')
st.title('ConnectSphere HR Analytics Toolkit 🛠️')
st.markdown('---')


# 1. Salary Estimator
st.header('💰 New Hire Salary Estimator')
with st.expander('Click here to stimate a salary for a new role'):
  col1,col2,col3,col4 = st.columns(4)
  with col1:
    loc = st.selectbox('Location', options=['Paris','London','Copenhagen','Berlin'],key='sal_loc')
  with col2:
    dept = st.selectbox('Department',options=['Engineering','Product','Sales','Data','Finance','Human Resources'],key='sal_dept')
  with col3:
    job_titles_by_dept = {
        'Engineering':['Frontend Engineer','Backend Engineer','DevOps Engineer','QA Engineer','Engineering Manager','Cloud Engineer'],
        'Product':['Product Manager','Product Analyst','UX/UI Designer','Product Engineer','Head of Product'],
        'Sales':['Account Executive','Sales Development Representative','Sales Manager','Customer Success Manager','Sales Analyst','Head of Sales'],
        'Human Resources':['Talent Acquisition Specialist','HR Business Partner','People Analyst','HR Machine Learning Engineer','HR Data Scientist','HR Manager'],
        'Finance':['Credit Analyst','Accountant','Financial Analyst','Financial Planner','Auditor','Financial Lead'],
        'Data':['Data Analyst','Data Scientist','Analytics Engineer','Data Engineer','Machine Learning Engineer','Cloud Data Engineer','Head of Data'],
        'Leadership':['Chief Technicological Officer','Director of Product','Head of Global Sales','Chief People Officer','Chief Financial Officer','Chief Data Officer','Chief Executive Officer']
    }
    title = st.selectbox('Job Title',options=job_titles_by_dept.get(dept,[]))
  with col4:
    level = st.selectbox('Level',options=['T5','T4','T3','T2'],key='sal_level')

  if st.button('Predict Salary'):
    if not title:
      st.warning('Please select a job title')
    else:
      input_data = pl.DataFrame({
          'job_title':[title],'level':[level],
          'location':[loc],'department':[dept]
      })
      prediction = salary_model.predict(input_data.to_pandas())[0] # Converting it to a pandas dataframe for model compatibility
      st.success(f'Predicted Annual Salary (EUR): €{prediction:,.0f}')

st.markdown('---')

# 2 Salary Distribution Explorer
st.header('📊 Salary Distribution Explorer')
with st.expander('Click here to explore salary distributions'):
  col_a,col_b = st.columns(2)
  with col_a:
    dist_dept = st.selectbox('Select Department',options=df_people['department'].unique().to_list(),key='dist_dept')
  with col_b:
    dist_level = st.selectbox('Select Level',options=df_people['level'].unique().to_list(),key='dist_level')

  if st.button('Generate Salary Plot'):
    filtered_df = df_people.filter(
        (pl.col('department')== dist_dept) &
        (pl.col('level')== dist_level) &
        (pl.col('is_churn')==0)
    )

    if filtered_df.is_empty():
      st.warning('No data avaiable for this combination. Please try another selection.')
    else:
      fig_salary = px.box(
          filtered_df.to_pandas(),
          x='job_title',
          y='latest_salary_eur',
          color='location',
          title=f'Salary Distribution for {dist_level}{dist_dept}',
          labels={'latest_salary_eur':'Annual Salary (EUR)','job_title':'Job Title'}
      )
      st.plotly_chart(fig_salary, use_container_width=True)
st.markdown('---')


# 3. Attrition Risk Identifier
st.header('🚨 Attrition Risk Identifier')
with st.expander('Cllick here to assess an employees attrition risk'):
  col1a,col2b = st.columns([1,2])
  with col1a:
    tenure = st.number_input('Tenure (Days)',min_value=0,value=730,step=30)
    perf_score = st.slider('Last Performance Score',1,5,3)
    sat_score = st.slider('Last Satisfaction Score',1,5,3)
  with col2b:
    level_risk = st.selectbox('Employee Level',["T5", "T4", "T3", "T2"],key='risk_level')
    dept_risk = st.selectbox('Employee Department',["Engineering", "Product", "Sales", "Data", "Finance", "Human Resources"],key='risk_dept')
    loc_risk = st.selectbox('Employee Location',["Paris", "London", "Copenhagen",'Berlin'], key='risk_loc')

  if st.button('Calculate Attrition Risk'):
    schema = {col: pl.Int64 for col in attrition_model_cols if col not in ['tenure_days','performance_score','satisfaction_score']}
    schema.update({'tenure_days': pl.Int64, 'performance_score': pl.Int64, 'satisfaction_score': pl.Int64})
    input_df = pl.DataFrame({col: [0] for col in attrition_model_cols}).with_columns([pl.col(pl.datatypes.Float64).cast(pl.Int64)])

    input_df = input_df.with_columns([
        pl.lit(tenure).alias('tenure_days'),
        pl.lit(perf_score).alias('performance_score'),
        pl.lit(sat_score).alias('satisfaction_score')
    ])

# Setting the one-hot encoded columns
    if f'level_{level_risk}' in input_df.columns: input_df = input_df.with_columns(pl.lit(1).alias(f'level_{level_risk}'))
    if f'department_{dept_risk}' in input_df.columns: input_df = input_df.with_columns(pl.lit(1).alias(f'department_{dept_risk}'))
    if f'location_{loc_risk}' in input_df.columns: input_df = input_df.with_columns(pl.lit(1).alias(f'location_{loc_risk}'))

# Predict probability
    risk_proba = attrition_model.predict_proba(input_df.to_pandas())[0][1]

    if risk_proba > 0.6:
      st.error(f'High Risk of Attrition: {risk_proba:.1%} probability')
    elif risk_proba > 0.3:
      st.warning(f'Medium Risk of Attrition: {risk_proba:.1%} probability')
    else:
      st.success(f'Low Risk of Attrition: {risk_proba:.1%} probability')

Overwriting app.py


In [83]:
from pyngrok import ngrok
from google.colab import userdata
import time

In [84]:
try:
  ngrok_token = userdata.get('NGROK_AUTH_TOKEN')
  ngrok.set_auth_token(ngrok_token)
except Exception as e:
  print(f'Could not set ngrok token. Make sure its saved in Colab Secrets. Error: {e}')

In [85]:
!streamlit run app.py &>/dev/null&

In [86]:
time.sleep(5)

In [87]:
try:
    public_url = ngrok.connect(8501)
    print(f"Click the URL to open your Streamlit app: {public_url}")
except Exception as e:
    print(f"Could not connect to ngrok. Error: {e}")
    print("Please check your ngrok token and make sure no other tunnels are running on your account.")

Click the URL to open your Streamlit app: NgrokTunnel: "https://3574119185b8.ngrok-free.app" -> "http://localhost:8501"


In [79]:
!pgrep streamlit

18292


In [80]:
!kill 18292

In [81]:
ngrok.kill()