In [10]:
import pandas as pd
import numpy as np
from faker import Faker
import random
import plotly.express as px
# Initialize Faker
fake = Faker()


In [6]:
# Generate data
num_employees_per_location = 100
total_employees = num_employees_per_location * 5
organizations = [f"Organization {i+1}" for i in range(7)]
locations = ["Belfast", "Northern", "South Eastern", "Southern", "Western"]
departments = ["HR", "Finance", "IT", "Marketing", "Sales", "R&D"]
positions = ["Manager", "Analyst", "Specialist", "Coordinator", "Executive"]

data = []

for location in locations:
    for _ in range(num_employees_per_location):
        employee_id = fake.unique.random_number(digits=5, fix_len=True)
        name = fake.name()
        age = random.randint(22, 65)
        gender = random.choice(["Male", "Female"])
        organization = random.choice(organizations)
        department = random.choice(departments)
        position = random.choice(positions)
        salary = round(random.uniform(30000, 120000), 2)
        hire_date = fake.date_between(start_date='-10y', end_date='today')
        performance_score = round(random.uniform(1, 5), 2)
        
        data.append([
            employee_id, name, age, gender, organization, 
            location, department, position, salary, hire_date, performance_score
        ])

# Create DataFrame
columns = [
    "Employee ID", "Name", "Age", "Gender", "Organization", 
    "Location", "Department", "Position", "Salary", "Hire Date", "Performance Score"
]
df = pd.DataFrame(data, columns=columns)

In [7]:
df

Unnamed: 0,Employee ID,Name,Age,Gender,Organization,Location,Department,Position,Salary,Hire Date,Performance Score
0,34320,Nancy Pitts,40,Male,Organization 2,Belfast,R&D,Specialist,65150.97,2016-01-17,4.65
1,50153,William Page,24,Female,Organization 4,Belfast,Marketing,Coordinator,35717.94,2014-07-30,2.15
2,45735,Jesse Spence,38,Male,Organization 5,Belfast,Finance,Executive,36945.19,2017-08-20,2.60
3,11261,Carlos Taylor,62,Female,Organization 2,Belfast,R&D,Specialist,75171.14,2017-03-18,3.36
4,16556,Kathryn Martin,42,Male,Organization 6,Belfast,Sales,Specialist,71200.21,2022-04-15,2.27
...,...,...,...,...,...,...,...,...,...,...,...
495,67128,Cheryl Gentry,36,Female,Organization 4,Western,R&D,Coordinator,108598.24,2017-02-04,2.58
496,96983,Nathan Howe,47,Female,Organization 2,Western,R&D,Analyst,72398.33,2021-10-07,3.82
497,23651,Michael Johnson,36,Male,Organization 7,Western,Marketing,Executive,115360.09,2022-04-11,3.82
498,24431,Mr. Brent Gallegos DDS,48,Male,Organization 5,Western,Finance,Executive,110548.50,2022-12-18,4.07


In [8]:
# Save to Excel
file_path = "workforce_management_data.xlsx"
df.to_excel(file_path, index=False)



In [15]:
# Add dummy data for Training Hours and Overtime Hours
df['Training Hours'] = [random.randint(10, 50) for _ in range(len(df))]
df['Overtime Hours'] = [random.randint(0, 20) for _ in range(len(df))]

# Calculate KPIs

# 1. Employee Turnover Rate (Assuming turnover can be inferred from performance score for this dummy dataset)
turnover_rate = (df['Performance Score'] < 2).mean() * 100

# 2. Average Tenure (in years)
df['Hire Date'] = pd.to_datetime(df['Hire Date'])
df['Tenure'] = (pd.Timestamp('today') - df['Hire Date']).dt.days / 365.25
average_tenure = df['Tenure'].mean()

# 3. Absenteeism Rate (Assuming absenteeism can be inferred from performance score for this dummy dataset)
absenteeism_rate = (df['Performance Score'] < 3).mean() * 100

# 4. Employee Satisfaction Score (Assuming performance score represents satisfaction)
average_satisfaction_score = df['Performance Score'].mean()

# 5. Average Salary
average_salary = df['Salary'].mean()

# 6. Performance Score Analysis
average_performance_score = df['Performance Score'].mean()

# 7. Headcount by Department and Location
headcount_by_department = df.groupby('Department')['Employee ID'].count().reset_index()
headcount_by_location = df.groupby('Location')['Employee ID'].count().reset_index()

# 8. Average Training Hours per Employee
average_training_hours = df['Training Hours'].mean()

# 9. Average Overtime Hours per Employee
average_overtime_hours = df['Overtime Hours'].mean()

# 10. Employee Productivity (Performance Score / Salary)
df['Productivity'] = df['Performance Score'] / df['Salary']
average_productivity = df['Productivity'].mean()

# 11. Promotions Rate (Assuming employees with Performance Score > 4 are promoted)
promotions_rate = (df['Performance Score'] > 4).mean() * 100

# Display the results
kpis = {
    "Employee Turnover Rate (%)": turnover_rate,
    "Average Tenure (years)": average_tenure,
    "Absenteeism Rate (%)": absenteeism_rate,
    "Average Satisfaction Score": average_satisfaction_score,
    "Average Salary ($)": average_salary,
    "Average Performance Score": average_performance_score,
    "Average Training Hours": average_training_hours,
    "Average Overtime Hours": average_overtime_hours,
    "Average Productivity": average_productivity,
    "Promotions Rate (%)": promotions_rate
}

# Creating a dataframe for better visualization
kpis_df = pd.DataFrame(list(kpis.items()), columns=['KPI', 'Value'])
print("Key Performance Indicators (KPIs):")
print(kpis_df)

# Plotting the KPIs using Plotly Express
fig = px.bar(kpis_df, x='KPI', y='Value', title='Workforce Management KPIs', labels={'Value': 'Value'})
fig.show()

# Display Headcount by Department and Location
print("\nHeadcount by Department:")
print(headcount_by_department)

print("\nHeadcount by Location:")
print(headcount_by_location)

# Plotting Headcount by Department using Plotly Express
fig_department = px.bar(headcount_by_department, x='Department', y='Employee ID', title='Headcount by Department', labels={'Employee ID': 'Headcount'})
fig_department.show()

# Plotting Headcount by Location using Plotly Express
fig_location = px.bar(headcount_by_location, x='Location', y='Employee ID', title='Headcount by Location', labels={'Employee ID': 'Headcount'})
fig_location.show()

Key Performance Indicators (KPIs):
                          KPI         Value
0  Employee Turnover Rate (%)     24.000000
1      Average Tenure (years)      4.876370
2        Absenteeism Rate (%)     49.400000
3  Average Satisfaction Score      3.027160
4          Average Salary ($)  73679.444220
5   Average Performance Score      3.027160
6      Average Training Hours     30.024000
7      Average Overtime Hours     10.478000
8        Average Productivity      0.000048
9         Promotions Rate (%)     26.600000



Headcount by Department:
  Department  Employee ID
0    Finance           77
1         HR           80
2         IT           88
3  Marketing           86
4        R&D           87
5      Sales           82

Headcount by Location:
        Location  Employee ID
0        Belfast          100
1       Northern          100
2  South Eastern          100
3       Southern          100
4        Western          100


In [16]:
# Add dummy data for Training Hours, Overtime Hours, and Diversity
df['Training Hours'] = [random.randint(10, 50) for _ in range(len(df))]
df['Overtime Hours'] = [random.randint(0, 20) for _ in range(len(df))]
df['Diverse Background'] = [random.choice([True, False]) for _ in range(len(df))]
df['Leadership Position'] = [random.choice([True, False]) for _ in range(len(df))]

# Calculate KPIs

# 1. Employee Turnover Rate (Assuming turnover can be inferred from performance score for this dummy dataset)
turnover_rate = (df['Performance Score'] < 2).mean() * 100

# 2. Average Tenure (in years)
df['Hire Date'] = pd.to_datetime(df['Hire Date'])
df['Tenure'] = (pd.Timestamp('today') - df['Hire Date']).dt.days / 365.25
average_tenure = df['Tenure'].mean()

# 3. Absenteeism Rate (Assuming absenteeism can be inferred from performance score for this dummy dataset)
absenteeism_rate = (df['Performance Score'] < 3).mean() * 100

# 4. Employee Satisfaction Score (Assuming performance score represents satisfaction)
average_satisfaction_score = df['Performance Score'].mean()

# 5. Average Salary
average_salary = df['Salary'].mean()

# 6. Performance Score Analysis
average_performance_score = df['Performance Score'].mean()

# 7. Headcount by Department and Location
headcount_by_department = df.groupby('Department')['Employee ID'].count().reset_index()
headcount_by_location = df.groupby('Location')['Employee ID'].count().reset_index()

# 8. Average Training Hours per Employee
average_training_hours = df['Training Hours'].mean()

# 9. Average Overtime Hours per Employee
average_overtime_hours = df['Overtime Hours'].mean()

# 10. Employee Productivity (Performance Score / Salary)
df['Productivity'] = df['Performance Score'] / df['Salary']
average_productivity = df['Productivity'].mean()

# 11. Promotions Rate (Assuming employees with Performance Score > 4 are promoted)
promotions_rate = (df['Performance Score'] > 4).mean() * 100

# Strategic KPIs

# 1. Employee Engagement Rate (Assuming engagement can be inferred from performance score > 3.5)
engagement_rate = (df['Performance Score'] > 3.5).mean() * 100

# 2. Diversity Rate
diversity_rate = df['Diverse Background'].mean() * 100

# 3. Leadership Diversity
leadership_diversity_rate = df[df['Leadership Position']]['Diverse Background'].mean() * 100

# 4. Talent Retention Rate (Assuming top talent is represented by performance score > 4)
talent_retention_rate = (df['Performance Score'] > 4).mean() * 100

# 5. Training Effectiveness (Improvement in performance scores, assuming improvement is significant if score > 3)
training_effectiveness = (df['Performance Score'] > 3).mean() * 100

# 6. Internal Promotion Rate (Assuming leadership positions filled internally)
internal_promotion_rate = df['Leadership Position'].mean() * 100

# Display the results
kpis = {
    "Employee Turnover Rate (%)": turnover_rate,
    "Average Tenure (years)": average_tenure,
    "Absenteeism Rate (%)": absenteeism_rate,
    "Average Satisfaction Score": average_satisfaction_score,
    "Average Salary ($)": average_salary,
    "Average Performance Score": average_performance_score,
    "Average Training Hours": average_training_hours,
    "Average Overtime Hours": average_overtime_hours,
    "Average Productivity": average_productivity,
    "Promotions Rate (%)": promotions_rate,
    "Employee Engagement Rate (%)": engagement_rate,
    "Diversity Rate (%)": diversity_rate,
    "Leadership Diversity Rate (%)": leadership_diversity_rate,
    "Talent Retention Rate (%)": talent_retention_rate,
    "Training Effectiveness (%)": training_effectiveness,
    "Internal Promotion Rate (%)": internal_promotion_rate
}

# Creating a dataframe for better visualization
kpis_df = pd.DataFrame(list(kpis.items()), columns=['KPI', 'Value'])
print("Key Performance Indicators (KPIs):")
print(kpis_df)

# Plotting the KPIs using Plotly Express
fig = px.bar(kpis_df, x='KPI', y='Value', title='Workforce Management KPIs', labels={'Value': 'Value'})
fig.show()

# Display Headcount by Department and Location
print("\nHeadcount by Department:")
print(headcount_by_department)

print("\nHeadcount by Location:")
print(headcount_by_location)

# Plotting Headcount by Department using Plotly Express
fig_department = px.bar(headcount_by_department, x='Department', y='Employee ID', title='Headcount by Department', labels={'Employee ID': 'Headcount'})
fig_department.show()

# Plotting Headcount by Location using Plotly Express
fig_location = px.bar(headcount_by_location, x='Location', y='Employee ID', title='Headcount by Location', labels={'Employee ID': 'Headcount'})
fig_location.show()

Key Performance Indicators (KPIs):
                              KPI         Value
0      Employee Turnover Rate (%)     24.000000
1          Average Tenure (years)      4.876370
2            Absenteeism Rate (%)     49.400000
3      Average Satisfaction Score      3.027160
4              Average Salary ($)  73679.444220
5       Average Performance Score      3.027160
6          Average Training Hours     30.152000
7          Average Overtime Hours      9.758000
8            Average Productivity      0.000048
9             Promotions Rate (%)     26.600000
10   Employee Engagement Rate (%)     38.000000
11             Diversity Rate (%)     50.200000
12  Leadership Diversity Rate (%)     52.789700
13      Talent Retention Rate (%)     26.600000
14     Training Effectiveness (%)     50.400000
15    Internal Promotion Rate (%)     46.600000



Headcount by Department:
  Department  Employee ID
0    Finance           77
1         HR           80
2         IT           88
3  Marketing           86
4        R&D           87
5      Sales           82

Headcount by Location:
        Location  Employee ID
0        Belfast          100
1       Northern          100
2  South Eastern          100
3       Southern          100
4        Western          100
