In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.offline import offline, iplot
from datetime import datetime
from wordcloud import WordCloud, ImageColorGenerator

# Set Default Options
# pd.set_option("display.max_columns", None)
pd.options.display.float_format = '{:,.1f}'.format
used_color = ["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3", "#98EECC"]

### __Custome Visualization Functions__

In [2]:
# Adding Line to Plotly Figure
def add_line(
    x0 = 0, 
    y0=0,
    x1=0, 
    y1=0, 
    line_color='#ED1C24', 
    font_color='000000',
    xposition = 'right',
    text = 'Text'):
    
    fig.add_shape(type='line', x0=x0, y0=y0, x1=x1,y1=y1,
                  line = {
                      'color' : line_color,
                      'width' : 3,
                      'dash' : 'dashdot'
                  },
                  label = {
                      'text' : f'{text} : {x1: 0.1f}\t',
                      'textposition' : 'end',
                      'yanchor' : 'top',
                      'xanchor' : xposition,
                      'textangle' : 0,
                      'font' : {
                          'size' : 15,
                          'color' : font_color,
                          'family' : 'tahoma'
                      },
                  })

In [3]:
def custom_layout(title_size = 28, showlegend = False):
    fig.update_layout(
        showlegend = showlegend,
        title = {
            'font' : {
                'size' : title_size,
                'family' : 'tahoma'
            }
        },
        
        hoverlabel = {
            'bgcolor' : '#111',
            'font_size' : 16,
            'font_family' : 'arial'
        }
    )

In [4]:
df_emp = pd.read_excel('./EmployeeInformation.xlsx')
print(f'Number of Employess: {df_emp.shape[0]}')
print(f'Number of Features: {df_emp.shape[1]}')

Number of Employess: 90
Number of Features: 6


In [5]:
df_emp

Unnamed: 0,ID,Performance Review,City,Last Promotion Date,Salary,Overdue Vacation?
0,1,10,Alabama,NaT,4539,No
1,2,10,Arizona,NaT,3698,Yes
2,3,8,Colorado,NaT,4157,Yes
3,4,10,Missouri,NaT,4360,Yes
4,5,7,New York,NaT,4144,Yes
...,...,...,...,...,...,...
85,86,10,Alabama,NaT,4430,No
86,87,9,Arizona,NaT,4183,No
87,88,10,Colorado,NaT,4308,No
88,89,7,Maryland,NaT,3920,No


In [6]:
df_emp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   ID                   90 non-null     int64         
 1   Performance Review   90 non-null     int64         
 2   City                 90 non-null     object        
 3   Last Promotion Date  10 non-null     datetime64[ns]
 4   Salary               90 non-null     int64         
 5   Overdue Vacation?    89 non-null     object        
dtypes: datetime64[ns](1), int64(3), object(2)
memory usage: 4.3+ KB


In [7]:
# Clean the columns name from any spaces
df_emp.columns = df_emp.columns.str.replace(' ','_', regex=False).str.replace('?', '', regex=False)
df_emp.head(15)

Unnamed: 0,ID,Performance_Review,City,Last_Promotion_Date,Salary,Overdue_Vacation
0,1,10,Alabama,NaT,4539,No
1,2,10,Arizona,NaT,3698,Yes
2,3,8,Colorado,NaT,4157,Yes
3,4,10,Missouri,NaT,4360,Yes
4,5,7,New York,NaT,4144,Yes
5,6,5,Ohio,NaT,4257,No
6,7,8,Oregon,NaT,4534,No
7,8,9,Arizona,2017-08-12,4094,Yes
8,9,9,Montana,NaT,4289,Yes
9,10,9,Missouri,NaT,3834,Yes


### __Loading Departments Data__

In [8]:
df_dep = pd.read_excel('./DepartmentInformation.xlsx')
print(f'Number of Departments : {df_dep.shape[0]}')
print(f'Number of Columns : {df_dep.shape[1]}')

Number of Departments : 7
Number of Columns : 2


In [9]:
df_dep

Unnamed: 0,Department,Manager
0,Finance / Accounting,Phelipp
1,Legal,Anna
2,Strategy,Antonella
3,Marketing,Phelipp
4,Development,Leyla
5,Sales,Gabriela
6,Technology and Equipment,Sidney


In [10]:
df_dep.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Department  7 non-null      object
 1   Manager     7 non-null      object
dtypes: object(2)
memory usage: 244.0+ bytes


### __Load HR Fact Data__

In [11]:
df_hr = pd.read_excel('./HRDatabase.xlsx')
print(f'Number of Records: {df_hr.shape[0]}')
print(f'Number of Columns: {df_hr.shape[1]}')

Number of Records: 90
Number of Columns: 10


In [12]:
df_hr

Unnamed: 0,ID,Employee,Gender,Birth Date,Hire Date,Termination Date,Termination Reason,Education,Position,Department
0,1,Harley Matthews,Female,1989-08-17,2011-10-02,NaT,,Higher Certificate,Administrator,Finance / Accounting
1,2,Aliyah Thomas,Female,1988-03-27,2012-03-22,2015-05-01,Unfair Dismissal,Higher Certificate,Lawyer,Legal
2,3,Madeleine Bradley,Female,1981-04-18,2012-08-24,2016-06-19,Resignation,Bachelor's Incompleted,Administrative Analyst,Finance / Accounting
3,4,Gabrielle Gardner,Female,2002-10-31,2012-11-12,NaT,,Bachelor's Completed,Accounting Analyst,Finance / Accounting
4,5,Molly Owen,Female,1979-10-22,2013-09-18,NaT,,Bachelor's Completed,Database Analyst,Strategy
...,...,...,...,...,...,...,...,...,...,...
85,86,Aryan Reynolds,Male,1963-11-21,2018-12-17,2019-03-17,Resignation,Higher Certificate,Commercial Assistant,Sales
86,87,Grayson Lowe,Male,1986-04-15,2019-05-05,NaT,,Bachelor's Incompleted,Developer,Development
87,88,Antonio Ball,Male,1984-08-17,2019-07-25,NaT,,Bachelor's Completed,Pogrammer Analyst,Development
88,89,Ethan Simpson,Male,1969-07-14,2019-08-04,NaT,,Bachelor's Completed,Salesperson,Sales


In [13]:
df_hr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   ID                  90 non-null     int64         
 1   Employee            90 non-null     object        
 2   Gender              90 non-null     object        
 3   Birth Date          90 non-null     datetime64[ns]
 4   Hire Date           90 non-null     datetime64[ns]
 5   Termination Date    26 non-null     datetime64[ns]
 6   Termination Reason  26 non-null     object        
 7   Education           90 non-null     object        
 8   Position            90 non-null     object        
 9   Department          90 non-null     object        
dtypes: datetime64[ns](3), int64(1), object(6)
memory usage: 7.2+ KB
