In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import HBox, Label, Layout, IntSlider

In [28]:
#zaimportowanie pliku z danymi
df = pd.read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv") 
#df

In [29]:
# wybranie przydzielonych kolumn + Attrition
df_ds = df[["Attrition", "TrainingTimesLastYear", "TotalWorkingYears",
       "YearsAtCompany", "YearsInCurrentRole", "YearsSinceLastPromotion",
       "YearsWithCurrManager", "StockOptionLevel"]] 
df_ds

Unnamed: 0,Attrition,TrainingTimesLastYear,TotalWorkingYears,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,StockOptionLevel
0,Yes,0,8,6,4,0,5,0
1,No,3,10,10,7,1,7,1
2,Yes,3,7,0,0,0,0,0
3,No,3,8,8,7,3,0,0
4,No,3,6,2,2,2,2,1
...,...,...,...,...,...,...,...,...
1465,No,3,17,5,2,0,3,1
1466,No,5,9,7,7,1,7,1
1467,No,0,6,6,2,0,3,1
1468,No,3,17,9,6,0,8,0


# TrainingTimesLastYear vs. Attrition #

In [30]:
# Tabela przestawna dla liczby szkoleń w ostatnim roku
TTLY = pd.crosstab(df_ds["Attrition"], df_ds["TrainingTimesLastYear"]) 
TTLY

TrainingTimesLastYear,0,1,2,3,4,5,6
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
No,39,62,449,422,97,105,59
Yes,15,9,98,69,26,14,6


In [31]:
# Number of employees that left a company vs. the number of trainings in the last year
# Create a copy of dataframe with Attrition=Yes
df_ds_TTLY_YES = df_ds[df_ds["Attrition"]=="Yes"].copy()
# Count the employees that left the company
df_ds_TTLY_YES_only = df_ds_TTLY_YES.groupby(by="TrainingTimesLastYear").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_YES
df_ds_TTLY_YES_only = df_ds_TTLY_YES_only.rename(columns = {"Attrition":"Attrition_YES"})
# df_ds_TTLY_YES_only

In [32]:
# Number of employees that stayed in a company vs. the number of trainings in the last year
# Create a copy of dataframe with Attrition=No
df_ds_TTLY_NO = df_ds[df_ds["Attrition"]=="No"].copy()
# Count the employees that stayed in a company
df_ds_TTLY_NO_only = df_ds_TTLY_NO.groupby(by="TrainingTimesLastYear").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_NO
df_ds_TTLY_NO_only = df_ds_TTLY_NO_only.rename(columns = {"Attrition":"Attrition_NO"})
# df_ds_TTLY_NO_only

In [33]:
# Merge tables with trainings and attrition both YES and NO
df_ds_TTLY_Y_N = pd.merge(df_ds_TTLY_YES_only, df_ds_TTLY_NO_only, on="TrainingTimesLastYear", how="left")
# Reset index
df_ds_TTLY_Y_N.reset_index()
# Add a column with total number of employees
df_ds_TTLY_Y_N["Yes+No"] = df_ds_TTLY_Y_N["Attrition_YES"] + df_ds_TTLY_Y_N["Attrition_NO"]
# Add a column with probability of leaving a company
df_ds_TTLY_Y_N["Probability_YES"] = df_ds_TTLY_Y_N["Attrition_YES"]/df_ds_TTLY_Y_N["Yes+No"]
print(df_ds_TTLY_Y_N)

   TrainingTimesLastYear  Attrition_YES  Attrition_NO  Yes+No  Probability_YES
0                      0             15            39      54         0.277778
1                      1              9            62      71         0.126761
2                      2             98           449     547         0.179159
3                      3             69           422     491         0.140530
4                      4             26            97     123         0.211382
5                      5             14           105     119         0.117647
6                      6              6            59      65         0.092308


In [34]:
# Characteristics of slider for number of trainings in the last year

from IPython.display import display


style = {'description_width': 'initial'}
TTLY_slider = IntSlider(
    value=0,
    min=0,
    max=6,
    step=1,
    description='Number of trainings in last year:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    style = style,
    layout = Layout(width='70%')
)

# display(TTLY_slider)
HBox([TTLY_slider])

HBox(children=(IntSlider(value=0, continuous_update=False, description='Number of trainings in last year:', la…

In [35]:
# Function checking the attrition probability for number of trainings in the last year
def TTLY_probability(input_TTLY):  
    TrainNo = input_TTLY.value
    Attr_Y_prob = df_ds_TTLY_Y_N.loc[df_ds_TTLY_Y_N["TrainingTimesLastYear"]==TrainNo, "Probability_YES"]
    pTTLY = Attr_Y_prob[TrainNo]
    return pTTLY
    # print(pTTLY)
    # print("pTTLY: {}".format(Attr_Y_prob[TrainNo]))
    # print(type(pTTLY))

In [36]:
# Slider connected with value_handler function returning probability
def value_handler(v):
    TrainNo = TTLY_slider.value
    Attr_Y_prob = df_ds_TTLY_Y_N.loc[df_ds_TTLY_Y_N["TrainingTimesLastYear"]==TrainNo, "Probability_YES"]
    print("Prawdopodobieństwo odejścia z firmy wynosi: {}".format(Attr_Y_prob[TrainNo]))


widgets.interact(value_handler, v=TTLY_slider)

interactive(children=(IntSlider(value=0, continuous_update=False, description='Number of trainings in last yea…

<function __main__.value_handler(v)>

# YearsAtCompany vs. Attrition #

In [37]:
YAC = pd.crosstab(df_ds["Attrition"], df_ds["YearsAtCompany"]) # Tabela przestawna dla lat przepracowanych w firmie
YAC

YearsAtCompany,0,1,2,3,4,5,6,7,8,9,...,27,29,30,31,32,33,34,36,37,40
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
No,28,112,100,108,91,175,67,79,71,74,...,2,2,1,2,2,4,1,2,1,0
Yes,16,59,27,20,19,21,9,11,9,8,...,0,0,0,1,1,1,0,0,0,1


In [38]:
df_ds["YearsAtCompany"].max()

40

In [39]:
print(df_ds.groupby(by=["YearsAtCompany", "Attrition"]).count()["StockOptionLevel"])

YearsAtCompany  Attrition
0               No            28
                Yes           16
1               No           112
                Yes           59
2               No           100
                            ... 
33              Yes            1
34              No             1
36              No             2
37              No             1
40              Yes            1
Name: StockOptionLevel, Length: 64, dtype: int64


In [40]:
# Create copy of df_ds with Attrition and YearsAtCompany only
df_ds_YAC_YES = df_ds.loc[df_ds["Attrition"]=="Yes", ["Attrition", "YearsAtCompany"]].copy()
# Gather data in bins to avoid empty values
df_ds_YAC_YES["YearsAtCompany_bins"] = pd.cut(df_ds_YAC_YES["YearsAtCompany"], bins=[-1,3,7,10,30])
df_ds_YAC_YES["YearsAtCompany_bins"].value_counts()

(-1, 3]     122
(3, 7]       60
(7, 10]      35
(10, 30]     16
Name: YearsAtCompany_bins, dtype: int64

In [41]:
# Number of employees that left a company vs. the number of years worked in a company
# Create a copy of dataframe with Attrition=Yes
df_ds_YAC_YES = df_ds[df_ds["Attrition"]=="Yes"].copy()
df_ds_YAC_YES["YearsAtCompany_bins"] = pd.cut(df_ds_YAC_YES["YearsAtCompany"], bins=[-1,3,7,10,30])
# Count the employees that left the company
df_ds_YAC_YES_only = df_ds_YAC_YES.groupby(by="YearsAtCompany_bins").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_YES
df_ds_YAC_YES_only = df_ds_YAC_YES_only.rename(columns = {"Attrition":"Attrition_YES"})
# df_ds_YAC_YES_only

In [42]:
# Number of employees that stayed in a company vs. the number of years worked in a company
# Create a copy of dataframe with Attrition=No
df_ds_YAC_NO = df_ds[df_ds["Attrition"]=="No"].copy()
df_ds_YAC_NO["YearsAtCompany_bins"] = pd.cut(df_ds_YAC_NO["YearsAtCompany"], bins=[-1,3,7,10,30])
# Count the employees that stayed in a company
df_ds_YAC_NO_only = df_ds_YAC_NO.groupby(by="YearsAtCompany_bins").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_NO
df_ds_YAC_NO_only = df_ds_YAC_NO_only.rename(columns = {"Attrition":"Attrition_NO"})
# df_ds_YAC_NO_only

In [43]:
# Merge tables with trainings and attrition both YES and NO
df_ds_YAC_Y_N = pd.merge(df_ds_YAC_YES_only, df_ds_YAC_NO_only, on="YearsAtCompany_bins", how="left")
# Reset index
df_ds_YAC_Y_N.reset_index()
# Add a column with total number of employees
df_ds_YAC_Y_N["Yes+No"] = df_ds_YAC_Y_N["Attrition_YES"] + df_ds_YAC_Y_N["Attrition_NO"]
# Add a column with probability of leaving a company
df_ds_YAC_Y_N["Probability_YES"] = df_ds_YAC_Y_N["Attrition_YES"]/df_ds_YAC_Y_N["Yes+No"]
df_ds_YAC_Y_N

Unnamed: 0,YearsAtCompany_bins,Attrition_YES,Attrition_NO,Yes+No,Probability_YES
0,"(-1, 3]",122,348,470,0.259574
1,"(3, 7]",60,412,472,0.127119
2,"(7, 10]",35,247,282,0.124113
3,"(10, 30]",16,214,230,0.069565


In [44]:
# Characteristics of slider for the number of years worked in a company
from IPython.display import display
from ipywidgets import HBox, Label

style = {'description_width': 'initial'}
YAC_toggle = widgets.ToggleButtons(
    options=['(0-3>', '(3-7>', '(7-10>', '(10-30)'],
    description=' Number of years in a company:',
    disabled=False,
    button_style='',
    style = style,
    layout = Layout(width='80%')
)

# display(YAC_slider)
HBox([YAC_toggle])

HBox(children=(ToggleButtons(description=' Number of years in a company:', layout=Layout(width='80%'), options…

In [45]:
# Function checking the attrition probability for numbers spent in a company 
def YAC_probability(input_YAC):
    if input_YAC.value == "(0-3>":
        CompYearsNo = 0 #"(-1, 3]"
    elif input_YAC.value == "(3-7>":
        CompYearsNo = 1 #"(3, 7]"
    elif input_YAC.value == "(7-10>":
        CompYearsNo = 2 #"(7, 10]"
    else:
        CompYearsNo = 3 #"(10, 30]"
    pYAC = df_ds_YAC_Y_N.loc[int(CompYearsNo), "Probability_YES"]
    return pYAC
# print(pYAC)
# print("pYAC: {}".format(df_ds_YAC_Y_N.loc[int(CompYearsNo), "Probability_YES"]))
# print(type(pYAC))

In [46]:
# Nie mogłem się odwołać do tego pogrupowanego indexu, więc zamieniłem na wartości z indexu podstawowego
def value_handler(v):
    if YAC_toggle.value == "(0-3>":
        CYearsNo = 0 #"(-1, 3]"
    elif YAC_toggle.value == "(3-7>":
        CYearsNo = 1 #"(3, 7]"
    elif YAC_toggle.value == "(7-10>":
        CYearsNo = 2 #"(7, 10]"
    else:
        CYearsNo = 3 #"(10, 30]"
    
    print("Prawdopodobieństwo odejścia z firmy wynosi: {}".format(df_ds_YAC_Y_N.loc[int(CYearsNo), "Probability_YES"]))


widgets.interact(value_handler, v=YAC_toggle)

interactive(children=(ToggleButtons(description=' Number of years in a company:', layout=Layout(width='80%'), …

<function __main__.value_handler(v)>

# YearsInCurrentRole vs. Attrition #

In [47]:
YICR = pd.crosstab(df_ds["Attrition"], df_ds["YearsInCurrentRole"]) # Tabela przestawna dla lat przepracowanych na obecnym stanowisku
YICR

YearsInCurrentRole,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
No,171,46,304,119,89,35,35,191,82,61,27,22,9,13,10,6,7,4,2
Yes,73,11,68,16,15,1,2,31,7,6,2,0,1,1,1,2,0,0,0


In [48]:
print(df_ds.groupby(by=["YearsInCurrentRole", "Attrition"]).count()["StockOptionLevel"])

YearsInCurrentRole  Attrition
0                   No           171
                    Yes           73
1                   No            46
                    Yes           11
2                   No           304
                    Yes           68
3                   No           119
                    Yes           16
4                   No            89
                    Yes           15
5                   No            35
                    Yes            1
6                   No            35
                    Yes            2
7                   No           191
                    Yes           31
8                   No            82
                    Yes            7
9                   No            61
                    Yes            6
10                  No            27
                    Yes            2
11                  No            22
12                  No             9
                    Yes            1
13                  No            13
        

In [49]:
# Create copy of df_ds with Attrition and YearsInCurrentRole only
df_ds_YICR_YES = df_ds.loc[df_ds["Attrition"]=="Yes", ["Attrition", "YearsInCurrentRole"]].copy()
# Gather data in bins to avoid empty values
df_ds_YICR_YES["YearsInCurrentRole_bins"] = pd.cut(df_ds_YICR_YES["YearsInCurrentRole"], bins=[-1,3,7,18])
df_ds_YICR_YES["YearsInCurrentRole_bins"].value_counts()

(-1, 3]    168
(3, 7]      49
(7, 18]     20
Name: YearsInCurrentRole_bins, dtype: int64

In [50]:
# Number of employees that left a company vs. the number of years worked at current position
# Create a copy of dataframe with Attrition=Yes
df_ds_YICR_YES = df_ds[df_ds["Attrition"]=="Yes"].copy()
df_ds_YICR_YES["YearsInCurrentRole_bins"] = pd.cut(df_ds_YICR_YES["YearsInCurrentRole"], bins=[-1,3,7,18])
# Count the employees that left the company
df_ds_YICR_YES_only = df_ds_YICR_YES.groupby(by="YearsInCurrentRole_bins").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_YES
df_ds_YICR_YES_only = df_ds_YICR_YES_only.rename(columns = {"Attrition":"Attrition_YES"})
# df_ds_YAC_YES_only

In [51]:
# Number of employees that stayed in a company vs. the number of years worked in a company
# Create a copy of dataframe with Attrition=No
df_ds_YICR_NO = df_ds[df_ds["Attrition"]=="No"].copy()
df_ds_YICR_NO["YearsInCurrentRole_bins"] = pd.cut(df_ds_YICR_NO["YearsInCurrentRole"], bins=[-1,3,7,18])
# Count the employees that stayed in a company
df_ds_YICR_NO_only = df_ds_YICR_NO.groupby(by="YearsInCurrentRole_bins").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_NO
df_ds_YICR_NO_only = df_ds_YICR_NO_only.rename(columns = {"Attrition":"Attrition_NO"})
# df_ds_YAC_NO_only

In [52]:
# Merge tables with trainings and attrition both YES and NO
df_ds_YICR_Y_N = pd.merge(df_ds_YICR_YES_only, df_ds_YICR_NO_only, on="YearsInCurrentRole_bins", how="left")
# Reset index
df_ds_YICR_Y_N.reset_index()
# Add a column with total number of employees
df_ds_YICR_Y_N["Yes+No"] = df_ds_YICR_Y_N["Attrition_YES"] + df_ds_YICR_Y_N["Attrition_NO"]
# Add a column with probability of leaving a company
df_ds_YICR_Y_N["Probability_YES"] = df_ds_YICR_Y_N["Attrition_YES"]/df_ds_YICR_Y_N["Yes+No"]
df_ds_YICR_Y_N

Unnamed: 0,YearsInCurrentRole_bins,Attrition_YES,Attrition_NO,Yes+No,Probability_YES
0,"(-1, 3]",168,640,808,0.207921
1,"(3, 7]",49,350,399,0.122807
2,"(7, 18]",20,243,263,0.076046


In [53]:
# Characteristics of slider for the number of years worked at current position
from IPython.display import display
from ipywidgets import HBox, Label

style = {'description_width': 'initial'}
YICR_toggle = widgets.ToggleButtons(
    options=['(0-3>', '(3-7>', '(7-18>'],
    description='Number of years at current role:',
    disabled=False,
    button_style='',
    style = style,
    layout = Layout(width='80%')
)

# display(YICR_slider)
HBox([YICR_toggle])

HBox(children=(ToggleButtons(description='Number of years at current role:', layout=Layout(width='80%'), optio…

In [54]:
# Function checking the attrition probability for number of years in current role
def YICR_probability(input_YICR):
    if input_YICR.value == "(0-3>":
        RoleYearsNo = 0 #"(-1, 3]"
    elif input_YICR.value == "(3-7>":
        RoleYearsNo = 1 #"(3, 7]"
    else:
        RoleYearsNo = 2 #"(7, 18]"
    pYICR = df_ds_YICR_Y_N.loc[int(RoleYearsNo), "Probability_YES"]
    return pYICR
    # print(pYICR)
    # print("pYICR: {}".format(df_ds_YICR_Y_N.loc[int(RoleYearsNo), "Probability_YES"]))

In [55]:
# Nie mogłem się odwołać do tego pogrupowanego indexu, więc zamieniłem na wartości z indexu podstawowego
def value_handler(v):
    if YICR_toggle.value == "(0-3>":
        CYearsNo = 0 #"(-1, 3]"
    elif YICR_toggle.value == "(3-7>":
        CYearsNo = 1 #"(3, 7]"
    else:
        CYearsNo = 2 #"(7, 18]"
    
    print("Prawdopodobieństwo odejścia z firmy wynosi: {}".format(df_ds_YICR_Y_N.loc[int(CYearsNo), "Probability_YES"]))


widgets.interact(value_handler, v=YICR_toggle)

interactive(children=(ToggleButtons(description='Number of years at current role:', layout=Layout(width='80%')…

<function __main__.value_handler(v)>

# TotalWorkingYears vs. Attrition #

In [56]:
TWY = pd.crosstab(df_ds["Attrition"], df_ds["TotalWorkingYears"]) # Tabela przestawna dla lat przepracowanych na obecnym stanowisku
TWY

TotalWorkingYears,0,1,2,3,4,5,6,7,8,9,...,30,31,32,33,34,35,36,37,38,40
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
No,6,41,22,33,51,72,103,63,87,86,...,7,8,9,6,4,3,6,4,1,0
Yes,5,40,9,9,12,16,22,18,16,10,...,0,1,0,1,1,0,0,0,0,2


In [57]:
# Settings to display full dataframe date
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 12)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

print(df_ds.groupby(by=["TotalWorkingYears", "Attrition"]).count()["YearsAtCompany"])

TotalWorkingYears  Attrition
0                  No           6 
                   Yes          5 
1                  No           41
                   Yes          40
2                  No           22
                                ..
35                 No           3 
36                 No           6 
37                 No           4 
38                 No           1 
40                 Yes          2 
Name: YearsAtCompany, Length: 71, dtype: int64
  pd.set_option('display.max_colwidth', -1)


In [58]:
# Create copy of df_ds with Attrition and YearsInCurrentRole only
df_ds_TWY_YES = df_ds.loc[df_ds["Attrition"]=="Yes", ["Attrition", "TotalWorkingYears"]].copy()
# Gather data in bins to avoid empty values
df_ds_TWY_YES["TotalWorkingYears_bins"] = pd.cut(df_ds_TWY_YES["TotalWorkingYears"], bins=[-1,3,7,12,20,40])
df_ds_TWY_YES["TotalWorkingYears_bins"].value_counts()

(3, 7]      68
(7, 12]     63
(-1, 3]     63
(12, 20]    27
(20, 40]    16
Name: TotalWorkingYears_bins, dtype: int64

In [59]:
# Number of employees that left a company vs. the number of years worked at current position
# Create a copy of dataframe with Attrition=Yes
df_ds_TWY_YES = df_ds[df_ds["Attrition"]=="Yes"].copy()
df_ds_TWY_YES["TotalWorkingYears_bins"] = pd.cut(df_ds_TWY_YES["TotalWorkingYears"], bins=[-1,3,7,12,20,40])
# Count the employees that left the company
df_ds_TWY_YES_only = df_ds_TWY_YES.groupby(by="TotalWorkingYears_bins").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_YES
df_ds_TWY_YES_only = df_ds_TWY_YES_only.rename(columns = {"Attrition":"Attrition_YES"})
# df_ds_YAC_YES_only

In [60]:
# Number of employees that stayed in a company vs. the number of years worked in a company
# Create a copy of dataframe with Attrition=No
df_ds_TWY_NO = df_ds[df_ds["Attrition"]=="No"].copy()
df_ds_TWY_NO["TotalWorkingYears_bins"] = pd.cut(df_ds_TWY_NO["TotalWorkingYears"], bins=[-1,3,7,12,20,40])
# Count the employees that stayed in a company
df_ds_TWY_NO_only = df_ds_TWY_NO.groupby(by="TotalWorkingYears_bins").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_NO
df_ds_TWY_NO_only = df_ds_TWY_NO_only.rename(columns = {"Attrition":"Attrition_NO"})
# df_ds_YAC_NO_only

In [61]:
# Merge tables with trainings and attrition both YES and NO
df_ds_TWY_Y_N = pd.merge(df_ds_TWY_YES_only, df_ds_TWY_NO_only, on="TotalWorkingYears_bins", how="left")
# Reset index
df_ds_TWY_Y_N.reset_index()
# Add a column with total number of employees
df_ds_TWY_Y_N["Yes+No"] = df_ds_TWY_Y_N["Attrition_YES"] + df_ds_TWY_Y_N["Attrition_NO"]
# Add a column with probability of leaving a company
df_ds_TWY_Y_N["Probability_YES"] = df_ds_TWY_Y_N["Attrition_YES"]/df_ds_TWY_Y_N["Yes+No"]
df_ds_TWY_Y_N

Unnamed: 0,TotalWorkingYears_bins,Attrition_YES,Attrition_NO,Yes+No,Probability_YES
0,"(-1, 3]",63,102,165,0.381818
1,"(3, 7]",68,289,357,0.190476
2,"(7, 12]",63,422,485,0.129897
3,"(12, 20]",27,229,256,0.105469
4,"(20, 40]",16,191,207,0.077295


In [62]:
# Characteristics of slider for the number of years worked in total
from IPython.display import display
from ipywidgets import HBox, Label

style = {'description_width': 'initial'}
TWY_toggle = widgets.ToggleButtons(
    options=['(0-3>', '(3-7>', '(7-12>', '(12-20>', '(20-40>'],
    description='Number of working years: ',
    disabled=False,
    button_style='',
    style = style,
    layout = Layout(width='90%')
)

# display(TWS_slider)
HBox([TWY_toggle])

HBox(children=(ToggleButtons(description='Number of working years: ', layout=Layout(width='90%'), options=('(0…

In [63]:
# FUnction checking the attrition probability for total working years
def TWY_probability(input_TWY):
    if input_TWY.value == "(0-3>":
        WorkYearsNo = 0 #"(-1, 3]"
    elif input_TWY.value == "(3-7>":
        WorkYearsNo = 1 #"(3, 7]"
    elif input_TWY.value == "(7-12>":
        WorkYearsNo = 2 #"(7, 12]"
    elif input_TWY.value == "(12-20>":
        WorkYearsNo = 3 #"(12, 20]"  
    else:
        WorkYearsNo = 4 #"(20, 40]"
    pTWY = df_ds_TWY_Y_N.loc[int(WorkYearsNo), "Probability_YES"]
    return pTWY
    # print(pTWY)
    # print("pTWY: {}".format(df_ds_TWY_Y_N.loc[int(WorkYearsNo), "Probability_YES"]))

In [64]:
# Nie mogłem się odwołać do tego pogrupowanego indexu, więc zamieniłem na wartości z indexu podstawowego
def TWY_prob(v):
    if TWY_toggle.value == "(0-3>":
        CYearsNo = 0 #"(-1, 3]"
    elif TWY_toggle.value == "(3-7>":
        CYearsNo = 1 #"(3, 7]"
    elif TWY_toggle.value == "(7-12>":
        CYearsNo = 2 #"(7, 12]"
    elif TWY_toggle.value == "(12-20>":
        CYearsNo = 3 #"(12, 20]"  
    else:
        CYearsNo = 4 #"(20, 40]"
    
    print("Prawdopodobieństwo odejścia z firmy wynosi: {}".format(df_ds_TWY_Y_N.loc[int(CYearsNo), "Probability_YES"]))


widgets.interact(TWY_prob, v=TWY_toggle)

interactive(children=(ToggleButtons(description='Number of working years: ', layout=Layout(width='90%'), optio…

<function __main__.TWY_prob(v)>

# StockOptionLevel vs. Attrition #

In [65]:
# Pivot table for the level of stock options
SOL = pd.crosstab(df_ds["Attrition"], df_ds["StockOptionLevel"]) 
SOL

StockOptionLevel,0,1,2,3
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
No,477,540,146,70
Yes,154,56,12,15


In [66]:
# Number of employees that left a company vs. the level of options for stock
# Create a copy of dataframe with Attrition=Yes
df_ds_SOL_YES = df_ds[df_ds["Attrition"]=="Yes"].copy()
# Count the employees that left the company
df_ds_SOL_YES_only = df_ds_SOL_YES.groupby(by="StockOptionLevel").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_YES
df_ds_SOL_YES_only = df_ds_SOL_YES_only.rename(columns = {"Attrition":"Attrition_YES"})
# df_ds_SOL_YES_only

In [67]:
# Number of employees that stayed in a company vs. the level of options for stock
# Create a copy of dataframe with Attrition=No
df_ds_SOL_NO = df_ds[df_ds["Attrition"]=="No"].copy()
# Count the employees that stayed in a company
df_ds_SOL_NO_only = df_ds_SOL_NO.groupby(by="StockOptionLevel").count() ["Attrition"].reset_index()
# Change the name of column Attrition -> Atttrition_NO
df_ds_SOL_NO_only = df_ds_SOL_NO_only.rename(columns = {"Attrition":"Attrition_NO"})
# df_ds_TTLY_NO_only

In [68]:
# Merge tables with trainings and attrition both YES and NO
df_ds_SOL_Y_N = pd.merge(df_ds_SOL_YES_only, df_ds_SOL_NO_only, on="StockOptionLevel", how="left")
# Reset index
df_ds_SOL_Y_N.reset_index()
# Add a column with total number of employees
df_ds_SOL_Y_N["Yes+No"] = df_ds_SOL_Y_N["Attrition_YES"] + df_ds_SOL_Y_N["Attrition_NO"]
# Add a column with probability of leaving a company
df_ds_SOL_Y_N["Probability_YES"] = df_ds_SOL_Y_N["Attrition_YES"]/df_ds_SOL_Y_N["Yes+No"]
print(df_ds_SOL_Y_N)

   StockOptionLevel  Attrition_YES  Attrition_NO  Yes+No  Probability_YES
0  0                 154            477           631     0.244057       
1  1                 56             540           596     0.093960       
2  2                 12             146           158     0.075949       
3  3                 15             70            85      0.176471       


In [75]:
# Characteristics of slider for the level of options for stock

from IPython.display import display
from ipywidgets import HBox, Label

style = {'description_width': 'initial'}
SOL_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=3,
    step=1,
    description='Stock level option: ',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    style=style,
    layout=Layout(width='50%')
)

# display(TTLY_slider)
HBox([SOL_slider])

HBox(children=(IntSlider(value=0, continuous_update=False, description='Stock level option: ', layout=Layout(w…

In [70]:
# Function checking the attrition probability for stock option level
def SOL_probability(input_SOL):
    StockL = input_SOL.value
    Attr_Y_prob = df_ds_SOL_Y_N.loc[df_ds_SOL_Y_N["StockOptionLevel"]==StockL, "Probability_YES"]
    pSOL = Attr_Y_prob[StockL]
    return pSOL
    # print(pSOL)
    # print("pSOL: {}".format(Attr_Y_prob[StockL]))

In [71]:
# Slider connected with value_handler function returning probability
def SOL_prob(v):
    TrainNo = SOL_slider.value
    Attr_Y_prob = df_ds_SOL_Y_N.loc[df_ds_SOL_Y_N["StockOptionLevel"]==TrainNo, "Probability_YES"]
    print("Prawdopodobieństwo odejścia z firmy wynosi: {}".format(Attr_Y_prob[TrainNo]))


widgets.interactive(SOL_prob, v=SOL_slider)

interactive(children=(IntSlider(value=0, continuous_update=False, description='Stock level option: ', layout=L…

# PROBABILITIES #

In [76]:
# All widgets together in VBox
from ipywidgets import Box 
def probabilities(input_TTLY, input_YAC, input_YICR, input_TWY, input_SOL):

#TTLY
    pTTLY = TTLY_probability(TTLY_slider)
    print("pTTLY: {}".format(pTTLY))
#YAC
    pYAC = YAC_probability(YAC_toggle)
    print("pYAC: {}".format(pYAC))
#YICR
    pYICR = YICR_probability(YICR_toggle)
    print("pYICR: {}".format(pYICR))
#TWY
    pTWY = TWY_probability(TWY_toggle)
    print("pTWY: {}".format(pTWY))
#SOL
    pSOL = SOL_probability(SOL_slider)
    print("pSOL: {}".format(pSOL))

# Display widgets
ui = widgets.VBox([TTLY_slider, YAC_toggle, YICR_toggle, TWY_toggle, SOL_slider])
# Send inputs to function reading probabilities
out = widgets.interactive_output(probabilities, {'input_TTLY':TTLY_slider, 'input_YAC':YAC_toggle, 'input_YICR':YICR_toggle, 'input_TWY':TWY_toggle, 'input_SOL':SOL_slider})

display(ui, out)


VBox(children=(IntSlider(value=0, continuous_update=False, description='Number of trainings in last year:', la…

Output()

In [73]:
from ipywidgets import Layout, Button, Box, FloatText, Textarea, Dropdown, Label, IntSlider

def SOL_prob(v):
    TrainNo = SOL_slider.value
    # Attr_Y_prob = df_ds_SOL_Y_N.loc[df_ds_SOL_Y_N["StockOptionLevel"]==TrainNo, "Probability_YES"]
    #print("Prawdopodobieństwo odejścia z firmy wynosi: {}".format(Attr_Y_prob[TrainNo]))
    return df_ds_SOL_Y_N.loc[df_ds_SOL_Y_N["StockOptionLevel"]==TrainNo, "Probability_YES"]

def TWY_prob(v):
    if TWY_toggle.value == "(0-3>":
        CYearsNo = 0 #"(-1, 3]"
    elif TWY_toggle.value == "(3-7>":
        CYearsNo = 1 #"(3, 7]"
    elif TWY_toggle.value == "(7-12>":
        CYearsNo = 2 #"(7, 12]"
    elif TWY_toggle.value == "(12-20>":
        CYearsNo = 3 #"(12, 20]"  
    else:
        CYearsNo = 4 #"(20, 40]"
    #print("Prawdopodobieństwo odejścia z firmy wynosi: {}".format(df_ds_TWY_Y_N.loc[int(CYearsNo), "Probability_YES"]))
    return df_ds_TWY_Y_N.loc[int(CYearsNo), "Probability_YES"] 

form_item_layout = Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)

form_items = [
    Box([Label(value='Liczba szkoleń w ostatnim roku: '), TTLY_slider], layout=form_item_layout),
    Box([Label(value='Liczba lat w firmie: '), YAC_toggle], layout=form_item_layout),
    Box([Label(value='Liczba lat na obecnym stanowisku: '), YICR_toggle], layout=form_item_layout),
    Box([Label(value='Lata doświadczenia zawodowego: '), TWY_toggle], layout=form_item_layout),
    Box([Label(value='Poziom opcji na akcje: '), SOL_slider], layout=form_item_layout)
]

form = Box(form_items, layout=Layout(
    display='flex',
    flex_flow='column',
    border='solid 2px',
    align_items='stretch',
    width=''
))
form



0    0.244057
Name: Probability_YES, dtype: float64
0.38181818181818183


Box(children=(Box(children=(Label(value='Liczba szkoleń w ostatnim roku: '), IntSlider(value=0, continuous_upd…