#### Scenario
A healthcare provider initiated an attrition analysis. They wish to know their current attrition rate and, if the rate is greater than ten percent, what are the contributing factors and potential solutions.

#### Problem Statement
How can we increase employee retention while minimizing costs?

In [4]:
from pathlib import Path

import pandas as pd
from pandasql import sqldf

import utils

P_SQL = lambda q: sqldf(q, globals())

In [5]:
# Import file
filename = 'data/healthcare_attrition.csv'
df = pd.read_csv(Path(utils.root(), filename), sep=',')

In [15]:
# Attrition Rate
attrition_sr = df['Attrition'].value_counts()
attrition_rate = round(attrition_sr['Yes'] / attrition_sr['No'], 2)
print(attrition_rate)

# Attrition rate is 13%

0.13


In [16]:
# Ages most affected by attrition

attrition_by_age_query = '''
SELECT employeeid, age, attrition
FROM df
WHERE attrition = 'Yes'
'''

resultset = P_SQL(attrition_by_age_query)
attrition_age_sr = resultset['Age']

population_size = resultset['EmployeeID'].count()

#
empirical_rule_68 = utils.stats(attrition_age_sr)['empirical_rule_68']
count = sum(i >= 22 and i <= 40 for i in attrition_age_sr)

print(count)
print(population_size)
print(count / population_size)

print()


145
199
0.7286432160804021
(22, 40)
