In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.offline import offline, iplot
from datetime import datetime
from wordcloud import WordCloud, ImageColorGenerator

# Set Default Options
# pd.set_option("display.max_columns", None)
pd.options.display.float_format = '{:,.1f}'.format
used_color = ["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3", "#98EECC"]

### __Custome Visualization Functions__

In [2]:
# Adding Line to Plotly Figure
def add_line(
    x0 = 0, 
    y0=0,
    x1=0, 
    y1=0, 
    line_color='#ED1C24', 
    font_color='000000',
    xposition = 'right',
    text = 'Text'):
    
    fig.add_shape(type='line', x0=x0, y0=y0, x1=x1,y1=y1,
                  line = {
                      'color' : line_color,
                      'width' : 3,
                      'dash' : 'dashdot'
                  },
                  label = {
                      'text' : f'{text} : {x1: 0.1f}\t',
                      'textposition' : 'end',
                      'yanchor' : 'top',
                      'xanchor' : xposition,
                      'textangle' : 0,
                      'font' : {
                          'size' : 15,
                          'color' : font_color,
                          'family' : 'tahoma'
                      },
                  })

In [3]:
def custom_layout(title_size = 28, showlegend = False):
    fig.update_layout(
        showlegend = showlegend,
        title = {
            'font' : {
                'size' : title_size,
                'family' : 'tahoma'
            }
        },
        
        hoverlabel = {
            'bgcolor' : '#111',
            'font_size' : 16,
            'font_family' : 'arial'
        }
    )

In [4]:
df_emp = pd.read_excel('./EmployeeInformation.xlsx')
print(f'Number of Employess: {df_emp.shape[0]}')
print(f'Number of Features: {df_emp.shape[1]}')

Number of Employess: 90
Number of Features: 6


In [5]:
df_emp

Unnamed: 0,ID,Performance Review,City,Last Promotion Date,Salary,Overdue Vacation?
0,1,10,Alabama,NaT,4539,No
1,2,10,Arizona,NaT,3698,Yes
2,3,8,Colorado,NaT,4157,Yes
3,4,10,Missouri,NaT,4360,Yes
4,5,7,New York,NaT,4144,Yes
...,...,...,...,...,...,...
85,86,10,Alabama,NaT,4430,No
86,87,9,Arizona,NaT,4183,No
87,88,10,Colorado,NaT,4308,No
88,89,7,Maryland,NaT,3920,No


In [6]:
df_emp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   ID                   90 non-null     int64         
 1   Performance Review   90 non-null     int64         
 2   City                 90 non-null     object        
 3   Last Promotion Date  10 non-null     datetime64[ns]
 4   Salary               90 non-null     int64         
 5   Overdue Vacation?    89 non-null     object        
dtypes: datetime64[ns](1), int64(3), object(2)
memory usage: 4.3+ KB


In [7]:
# Clean the columns name from any spaces
df_emp.columns = df_emp.columns.str.replace(' ','_', regex=False).str.replace('?', '', regex=False)
df_emp.head(15)

Unnamed: 0,ID,Performance_Review,City,Last_Promotion_Date,Salary,Overdue_Vacation
0,1,10,Alabama,NaT,4539,No
1,2,10,Arizona,NaT,3698,Yes
2,3,8,Colorado,NaT,4157,Yes
3,4,10,Missouri,NaT,4360,Yes
4,5,7,New York,NaT,4144,Yes
5,6,5,Ohio,NaT,4257,No
6,7,8,Oregon,NaT,4534,No
7,8,9,Arizona,2017-08-12,4094,Yes
8,9,9,Montana,NaT,4289,Yes
9,10,9,Missouri,NaT,3834,Yes


### __Loading Departments Data__

In [8]:
df_dep = pd.read_excel('./DepartmentInformation.xlsx')
print(f'Number of Departments : {df_dep.shape[0]}')
print(f'Number of Columns : {df_dep.shape[1]}')

Number of Departments : 7
Number of Columns : 2


In [9]:
df_dep

Unnamed: 0,Department,Manager
0,Finance / Accounting,Phelipp
1,Legal,Anna
2,Strategy,Antonella
3,Marketing,Phelipp
4,Development,Leyla
5,Sales,Gabriela
6,Technology and Equipment,Sidney


In [10]:
df_dep.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Department  7 non-null      object
 1   Manager     7 non-null      object
dtypes: object(2)
memory usage: 244.0+ bytes


### __Load HR Fact Data__

In [11]:
df_hr = pd.read_excel('./HRDatabase.xlsx')
print(f'Number of Records: {df_hr.shape[0]}')
print(f'Number of Columns: {df_hr.shape[1]}')

Number of Records: 90
Number of Columns: 10


In [12]:
df_hr

Unnamed: 0,ID,Employee,Gender,Birth Date,Hire Date,Termination Date,Termination Reason,Education,Position,Department
0,1,Harley Matthews,Female,1989-08-17,2011-10-02,NaT,,Higher Certificate,Administrator,Finance / Accounting
1,2,Aliyah Thomas,Female,1988-03-27,2012-03-22,2015-05-01,Unfair Dismissal,Higher Certificate,Lawyer,Legal
2,3,Madeleine Bradley,Female,1981-04-18,2012-08-24,2016-06-19,Resignation,Bachelor's Incompleted,Administrative Analyst,Finance / Accounting
3,4,Gabrielle Gardner,Female,2002-10-31,2012-11-12,NaT,,Bachelor's Completed,Accounting Analyst,Finance / Accounting
4,5,Molly Owen,Female,1979-10-22,2013-09-18,NaT,,Bachelor's Completed,Database Analyst,Strategy
...,...,...,...,...,...,...,...,...,...,...
85,86,Aryan Reynolds,Male,1963-11-21,2018-12-17,2019-03-17,Resignation,Higher Certificate,Commercial Assistant,Sales
86,87,Grayson Lowe,Male,1986-04-15,2019-05-05,NaT,,Bachelor's Incompleted,Developer,Development
87,88,Antonio Ball,Male,1984-08-17,2019-07-25,NaT,,Bachelor's Completed,Pogrammer Analyst,Development
88,89,Ethan Simpson,Male,1969-07-14,2019-08-04,NaT,,Bachelor's Completed,Salesperson,Sales


In [13]:
df_hr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   ID                  90 non-null     int64         
 1   Employee            90 non-null     object        
 2   Gender              90 non-null     object        
 3   Birth Date          90 non-null     datetime64[ns]
 4   Hire Date           90 non-null     datetime64[ns]
 5   Termination Date    26 non-null     datetime64[ns]
 6   Termination Reason  26 non-null     object        
 7   Education           90 non-null     object        
 8   Position            90 non-null     object        
 9   Department          90 non-null     object        
dtypes: datetime64[ns](3), int64(1), object(6)
memory usage: 7.2+ KB


In [14]:
df_hr.columns = df_hr.columns.str.replace(' ','_', regex=False)

In [15]:
df_hr.head(10)

Unnamed: 0,ID,Employee,Gender,Birth_Date,Hire_Date,Termination_Date,Termination_Reason,Education,Position,Department
0,1,Harley Matthews,Female,1989-08-17,2011-10-02,NaT,,Higher Certificate,Administrator,Finance / Accounting
1,2,Aliyah Thomas,Female,1988-03-27,2012-03-22,2015-05-01,Unfair Dismissal,Higher Certificate,Lawyer,Legal
2,3,Madeleine Bradley,Female,1981-04-18,2012-08-24,2016-06-19,Resignation,Bachelor's Incompleted,Administrative Analyst,Finance / Accounting
3,4,Gabrielle Gardner,Female,2002-10-31,2012-11-12,NaT,,Bachelor's Completed,Accounting Analyst,Finance / Accounting
4,5,Molly Owen,Female,1979-10-22,2013-09-18,NaT,,Bachelor's Completed,Database Analyst,Strategy
5,6,Lilah Gill,Female,1980-10-05,2014-01-16,2017-10-17,Resignation,Bachelor's Completed,Business Intelligence Analyst,Strategy
6,7,Winnie May,Female,1992-11-16,2014-04-26,2017-10-07,Unfair Dismissal,Bachelor's Completed,Communication Analyst,Marketing
7,8,Blossom Howard,Female,1996-09-20,2014-05-08,NaT,,Major's,Personnel Analyst,Finance / Accounting
8,9,Anayah Armstrong,Female,1975-03-01,2014-06-15,NaT,,Major's,Marketing Analyst,Marketing
9,10,Isabel Wood,Female,1974-10-13,2014-11-30,NaT,,Major's,Business Analyst,Development


##### __Important Step: Merging (Joining) Tables__

In [16]:
df = df_emp.merge(right=df_hr).merge(right=df_dep)
df.set_index('ID', inplace=True)
df.sort_index(inplace=True)

In [17]:
df

Unnamed: 0_level_0,Performance_Review,City,Last_Promotion_Date,Salary,Overdue_Vacation,Employee,Gender,Birth_Date,Hire_Date,Termination_Date,Termination_Reason,Education,Position,Department,Manager
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,10,Alabama,NaT,4539,No,Harley Matthews,Female,1989-08-17,2011-10-02,NaT,,Higher Certificate,Administrator,Finance / Accounting,Phelipp
2,10,Arizona,NaT,3698,Yes,Aliyah Thomas,Female,1988-03-27,2012-03-22,2015-05-01,Unfair Dismissal,Higher Certificate,Lawyer,Legal,Anna
3,8,Colorado,NaT,4157,Yes,Madeleine Bradley,Female,1981-04-18,2012-08-24,2016-06-19,Resignation,Bachelor's Incompleted,Administrative Analyst,Finance / Accounting,Phelipp
4,10,Missouri,NaT,4360,Yes,Gabrielle Gardner,Female,2002-10-31,2012-11-12,NaT,,Bachelor's Completed,Accounting Analyst,Finance / Accounting,Phelipp
5,7,New York,NaT,4144,Yes,Molly Owen,Female,1979-10-22,2013-09-18,NaT,,Bachelor's Completed,Database Analyst,Strategy,Antonella
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,10,Alabama,NaT,4430,No,Aryan Reynolds,Male,1963-11-21,2018-12-17,2019-03-17,Resignation,Higher Certificate,Commercial Assistant,Sales,Gabriela
87,9,Arizona,NaT,4183,No,Grayson Lowe,Male,1986-04-15,2019-05-05,NaT,,Bachelor's Incompleted,Developer,Development,Leyla
88,10,Colorado,NaT,4308,No,Antonio Ball,Male,1984-08-17,2019-07-25,NaT,,Bachelor's Completed,Pogrammer Analyst,Development,Leyla
89,7,Maryland,NaT,3920,No,Ethan Simpson,Male,1969-07-14,2019-08-04,NaT,,Bachelor's Completed,Salesperson,Sales,Gabriela


### __Data Cleaning and Preprocessing__

In [18]:
df.columns

Index(['Performance_Review', 'City', 'Last_Promotion_Date', 'Salary',
       'Overdue_Vacation', 'Employee', 'Gender', 'Birth_Date', 'Hire_Date',
       'Termination_Date', 'Termination_Reason', 'Education', 'Position',
       'Department', 'Manager'],
      dtype='object')

##### Firstly, we are going to rearrange the order of the columns

In [19]:
df = df[['Employee', 'Gender', 'Birth_Date', 'Education',
         'City', 'Position', 'Performance_Review', 'Salary', 'Hire_Date', 'Last_Promotion_Date',
         'Termination_Date', 'Termination_Reason', 'Overdue_Vacation', 'Department', 'Manager']].copy()

In [20]:
df['Department'] = df['Department'].replace('Technology and Equipment', 'Technology')

In [21]:
df.head()

Unnamed: 0_level_0,Employee,Gender,Birth_Date,Education,City,Position,Performance_Review,Salary,Hire_Date,Last_Promotion_Date,Termination_Date,Termination_Reason,Overdue_Vacation,Department,Manager
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,Harley Matthews,Female,1989-08-17,Higher Certificate,Alabama,Administrator,10,4539,2011-10-02,NaT,NaT,,No,Finance / Accounting,Phelipp
2,Aliyah Thomas,Female,1988-03-27,Higher Certificate,Arizona,Lawyer,10,3698,2012-03-22,NaT,2015-05-01,Unfair Dismissal,Yes,Legal,Anna
3,Madeleine Bradley,Female,1981-04-18,Bachelor's Incompleted,Colorado,Administrative Analyst,8,4157,2012-08-24,NaT,2016-06-19,Resignation,Yes,Finance / Accounting,Phelipp
4,Gabrielle Gardner,Female,2002-10-31,Bachelor's Completed,Missouri,Accounting Analyst,10,4360,2012-11-12,NaT,NaT,,Yes,Finance / Accounting,Phelipp
5,Molly Owen,Female,1979-10-22,Bachelor's Completed,New York,Database Analyst,7,4144,2013-09-18,NaT,NaT,,Yes,Strategy,Antonella


### __Gender Column__

In [22]:
gender = df['Gender'].value_counts(normalize=1)*100
gender

Gender
Male     62.2
Female   37.8
Name: proportion, dtype: float64

In [23]:
fig = px.bar(
    data_frame=gender,
    x=gender.index,
    y=gender,
    color= gender.index,
    title= '<b>Gender Frequency (PCT)',
    color_discrete_sequence= ['red', 'lightgrey'],
    labels= {'index' : '<b>Gender', 'y': '<b>Frequency in PCT (%)'},
    template= 'plotly_white',
    text=gender.apply(lambda x: f'{x:0.0f}%')
)

custom_layout()

fig.update_traces(
    textfont={
        'size': 16,
        'family': 'arial',
        'color': '#222'
    },
    hovertemplate = 'Gender: %{x}<br>Percentange: %{y:0.1f}%',
    marker = dict(line=dict(color='#666', width=2))
)

iplot(fig)

### __Age Column__

In [24]:
birth = df['Birth_Date'].dt.year.value_counts()
birth.head()

Birth_Date
1990    6
1991    5
1996    5
1975    5
1984    4
Name: count, dtype: int64

In [25]:
current_year = datetime.now().year

age_mean = np.mean((current_year - birth.index).to_list())
age_median = np.median((current_year - birth.index).to_list())
print(f'Average Age: {age_mean:0.2f}')
print(f'Median Age: {age_median}')

Average Age: 42.82
Median Age: 43.0


In [49]:
fig = px.histogram(
    data_frame= birth, 
    x= (current_year - birth.index), # Calc the Age from Year of Birth
    y= birth,
    nbins= 15,
    color_discrete_sequence= ['#ED1C24'],
    labels={'x': '<b>Age', 'sum of y': '<b>Frequency'},
    title= '<b>The Distribution of Age of Our Employees',
    template= 'plotly_white'
    )

custom_layout()

fig.update_traces(
    textfont = {
        'size': 20,
        'family': 'tahoma',
        'color': '#fff'
    },
    hovertemplate = 'Age: %{x}<br>Frequency: %{y}',
    marker = dict(line=dict(color='#111', width=1))
)

# Adding Mean Line
add_line(
    x0=age_mean, y0=0,
    x1=age_mean, y1=birth.max()+15,
    line_color='#F2F0EB',
    font_color='#000000',
    text='Mean'
)

# Adding Median Line
add_line(
    x0= age_median, y0= 0,
    x1= age_median, y1= birth.max()+15,
    line_color= '#133955',
    font_color= '#000000',
    xposition= 'left',
    text='Median'
)

iplot(fig)

### __Education Column__

In [27]:
education = (df['Education'].value_counts(normalize=1) * 100)
education.apply(lambda x:f'{x:0.1f}%')

Education
Bachelor's Completed      28.9%
Major's                   20.0%
Higher Certificate        15.6%
Postgraduate              13.3%
Bachelor's Incompleted     7.8%
Doctoral                   7.8%
Master's                   6.7%
Name: proportion, dtype: object

In [51]:
fig = px.bar(
    data_frame=education,
    orientation='h',
    y=education.index,
    x=education,
    color=education.index,
    title='<b>Education Level Frequency (PCT)',
    template='plotly_white',
    labels= {'index':'<b>Education', 'x':'<b>Frequency in PCT (%)'},
    text=education.apply(lambda x: f'{x:0.0f}%')
)

custom_layout()

fig.update_traces(
    textfont = {
        'size':14,
        'family':'tahoma',
        'color':'#000'
    },
    hovertemplate = 'Education Level: %{y}<br>Percentage: %{x:0.01f}%',
    marker= dict(line=dict(color='#000', width=1))
)

iplot(fig)

### __City Column__

In [29]:
# Remove Space from City
df['City'] = df['City'].str.replace('\xa0', '', )

In [30]:
city = df['City'].nunique()

In [31]:
(df['City'].value_counts(normalize=1)*100).apply(lambda x: f'{x:0.1f}%')

City
Washington      10.0%
Texas           10.0%
Oregon           8.9%
Montana          8.9%
Missouri         7.8%
New York         7.8%
Ohio             7.8%
Colorado         6.7%
Hawaii           6.7%
Alabama          5.6%
Arizona          5.6%
Mississippi      5.6%
Maryland         5.6%
Rhode Island     3.3%
Name: proportion, dtype: object

### __Performance Review Column__

In [32]:
pd.DataFrame(df['Performance_Review'].describe()).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Performance_Review,90.0,7.7,1.7,5.0,6.2,8.0,9.0,10.0


In [33]:
fig = px.box(
    x= df['Performance_Review'],
    labels={'x':'<b>Performance Review'},
    template='plotly_white',
    title='<b>Performance 5-Summary',
    color_discrete_sequence=['#B31942']
)

custom_layout()
iplot(fig)

From Box Plot, we can say that __50%__ of our employees has performance between __6:9__

### __Hiring Date Column__

In [34]:
hiring_year = df['Hire_Date'].dt.year.value_counts().sort_index()

In [52]:
fig = px.line(
    x= hiring_year.index,
    y= hiring_year,
    markers= True,
    color_discrete_sequence= [used_color[5]],
    labels= {'x':'<b>Year', 'y':'<b>Count of Hired Employee'},
    template='plotly_white',
    title= '<b>Hired Employees Through Years'
)

custom_layout()
iplot(fig)

### __Termination Reason Column__

In [36]:
df['Termination_Reason'].value_counts()

Termination_Reason
Resignation                   11
Unfair Dismissal              10
Dismissal                      2
End of Internishp Contract     2
Mutual Agreement               1
Name: count, dtype: int64

In [37]:
reasons = df['Termination_Reason'].dropna()
reasons = reasons.str.replace(' ','_')

In [38]:
stopwords = ['End', 'of']
wordcloud = WordCloud(
    width = 500,
    height = 400,
    colormap = 'Greens',
    stopwords=stopwords,
    random_state=10
).generate(' '.join(reasons))

fig = px.imshow(wordcloud, template='plotly_dark')
fig.update_xaxes(showticklabels=False)
fig.update_yaxes(showticklabels=False)

iplot(fig)

### __Question & Insights__

##### __What is the average salary for each department?__

In [39]:
dep_salary = df.groupby(df['Department'])['Salary'].mean().sort_values(ascending=False)
dep_salary

Department
Marketing              4,313.8
Finance / Accounting   4,248.2
Development            4,091.0
Sales                  4,082.1
Strategy               4,075.1
Technology             3,999.4
Legal                  3,698.0
Name: Salary, dtype: float64

In [40]:
fig = px.bar(
    data_frame = dep_salary,
    orientation = 'h',
    x = dep_salary,
    y = dep_salary.index,
    color=dep_salary.index,
    template = 'plotly_white',
    color_discrete_sequence= [used_color[3]],
    text_auto = '0.4s',
    labels= {'x':'<b>Average Salary'},
    title= '<b>Average Salary For Each Deparment'
)

custom_layout()

fig.update_traces(
    textfont = {
        'size':14,
        'family':'poppins',
        'color':'#000'
    },
    hovertemplate = 'Department: %{y}<br>Percentage: %{x:0.3s}',
    marker = dict(line=dict(color='#222', width=2))
)

iplot(fig)

##### __What is the average performance for each department?__

In [41]:
perf_dept = df.groupby(df['Department'])['Performance_Review'].mean().sort_values(ascending=False).apply(lambda x: f'{x:0.1f}')
perf_dept

Department
Legal                   10.0
Finance / Accounting     7.9
Technology               7.9
Strategy                 7.8
Marketing                7.8
Development              7.6
Sales                    7.1
Name: Performance_Review, dtype: object

In [54]:
fig = px.bar(
    data_frame=perf_dept,
    orientation='h',
    x= perf_dept,
    y= perf_dept.index,
    color=perf_dept.index,
    template='plotly_white',
    color_discrete_sequence=[used_color[3]],
    text_auto='0.4s',
    labels= {'x':'Average Performance'},
    title='<b>Average Performance For Each Department'
)

custom_layout()

fig.update_traces(
    textfont = {
        'size':14,
        'family':'poppins',
        'color':'#000'
    },
    hovertemplate = 'Department: %{y}<br>Average Salary: %{x:0.3s}',
    marker = dict(line=dict(color='#222', width=2))
)

iplot(fig)

##### __What is the average salary for each jon position?__

In [43]:
position_salary = df.groupby(df['Position'])['Salary'].mean().sort_values(ascending=False)
top_10_position = position_salary.nlargest(10)
pd.DataFrame(top_10_position)

Unnamed: 0_level_0,Salary
Position,Unnamed: 1_level_1
Administrator,4539.0
Commercial Assistant,4361.7
Communication Analyst,4333.5
Administrative Analyst,4325.0
Financial Analyst,4304.0
Marketing Analyst,4294.0
Administrative Assistant,4234.0
Developer,4200.1
Accounting Analyst,4183.0
Business Intelligence Analyst,4140.6


##### __For each department, what is the popularity of each gender?__

In [44]:
gender_dept = df.pivot_table(
    index='Department',
    columns='Gender',
    values='Employee',
    aggfunc='count'
).fillna(0)

gender_dept = gender_dept.sort_values('Male', ascending=False)
gender_dept = gender_dept.iloc[:, [1, 0]]
gender_dept

Gender,Male,Female
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Development,32.0,15.0
Finance / Accounting,7.0,6.0
Sales,5.0,3.0
Strategy,5.0,4.0
Technology,5.0,3.0
Marketing,2.0,2.0
Legal,0.0,1.0


In [45]:
fig = px.bar(
    data_frame=gender_dept,
    barmode='group',
    template='plotly_white',
    color_discrete_sequence=used_color,
    labels= {'value':'Gender Frequency'},
    title='<b>Popularity of Gender via Department',
    text_auto=True
)

custom_layout(showlegend=True)

fig.update_traces(
    textposition = 'outside',
    textfont = {
        'size':14,
        'family':'poppins',
        'color':'#000'
    },
    hovertemplate = 'Department: %{x}<br>Gender Frequency: %{y}',
    marker = dict(line=dict(color='#222', width=2))
)

iplot(fig)

##### __Important KPI : What is the Performance Review Completion Rate?__

In [67]:
# First we will get the count of review of Completion
completed_review = df.loc[df['Performance_Review'] == 10, 'Performance_Review'].count()
perf_rate = completed_review / len(df) * 100
print(f'Performance Review Completion Rate: {perf_rate:0.2f}%')

Performance Review Completion Rate: 17.78%


##### I think 17.78% is low. So the recommendation is:
* Offer training for Employees
* Reward and Punishment : To Encourage all employees to get high performance reviews
* Visual Performance Tracking : We can share the performance tracking for all employees which will be extremely effective in motivating them