In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import time
import random

# Simulation Project (FA23):
We are looking to simulate bias in the workplace




## Setup:

In [2]:
# Starting index for workers- used to measure seniority
worker_id = 1

# number-char correspondences for pretty printing
work_levels = {
    1 : "J",
    2 : "M",
    3 : "S",
    4 : "E"
}

worker_genders = {
    0 : "M",
    1 : "F"
}

# Appropriate staff sizes for each level:
level_sizes = {
    1 : 400,
    2 : 100,
    3 : 25,
    4 : 5
}

# The impact that seniority has on staying time - older people retire later?
# Set negative for opposite effect.
level_ext_stay_times = {
    1 : 0,
    2 : 10,
    3 : 20,
    4 : 30
}

# The impact that gender has on staying time -- women pushed out quicker?
gender_stay_times = {
    0 : 30,
    1 : 28
}

# A database to store workers, organized by level:
worker_db = {
    1 : [],
    2 : [],
    3 : [],
    4 : []
}

In [3]:
class worker(object):
    
    def __init__(self, level, gender, idx, start_time):
        self.level = level
        self.gender = gender
        self.idx = idx
        self.start_time = start_time
        
    def __lt__(self, other):
        return self.idx < other.get_index()
        
    def __str__(self):
        return "[level: %s, gender: %s, id: %i, end_time: %f]" \
            % (self.level, self.gender, self.idx, self.end_time)
        
    def get_index(self):
        return self.idx
    
    def get_level(self):
        return self.level
    
    def set_end_time(self):
#       Setting end time for male employees
        if self.gender == 0:
            self.end_time = self.start_time \
                + np.random.exponential( \
                gender_stay_times[self.gender] + level_ext_stay_times[self.level])
            
#       Setting end time for female employees - min of 2 Expo RVs
        else:
            self.end_time = self.start_time \
                + min(np.random.exponential( \
                gender_stay_times[self.gender] + level_ext_stay_times[self.level]), \
                gender_stay_times[self.gender] + level_ext_stay_times[self.level])       
                
    def get_end_time(self):
        return self.end_time
    
    def get_gender(self):
        return self.gender
    
    def promote(self):
        self.level += 1
        self.set_end_time()

In [4]:
def hire_worker(level, idx, all_male=False, all_female=False):
    assert not all_female or not all_male
    # Randomized gender for new hire -- Change to 3 evenutally for NB case?
    if all_female:
        gender = 1
    elif all_male:
        gender = 0
    else:
        gender = random.randint(0,1)

    # Create worker:
    hire = worker(level, gender, idx, time.time())
    hire.set_end_time()
    

    # Add worker to worker database:     
    worker_db[hire.get_level()].append(hire)

def populate_workforce(start_id, all_male = False, all_female = False):
    
    for key in worker_db.keys():
        worker_db[key] = []
    
    level = 4
    idx = 1
    
    while level >= 1:
        for i in range(level_sizes[level]):
            hire_worker(level, idx, all_male, all_female)
        
            idx += 1 
            
        level -= 1
    return idx 

def update_workforce(start_id):
    # Delete expired workers:
    for level in worker_db.keys():
        
        l = len(worker_db[level])
        idx = 0
        
#       keys are being changed during the loop- for loop won't work!
        while idx < l:
#           if a worker has 'expired', kick 'em out!
            if worker_db[level][idx].get_end_time() < time.time():
                worker_db[level] = worker_db[level][:idx] \
                                    + worker_db[level][idx+1:]
#               reflect that the size of the level is one less after removal
                l -= 1
            idx += 1
            
#       Sort to ensure most senior employees are promoted first
        worker_db[level].sort()
                
                
    # Promote to fill ranks:           
    lvl = 4
#   Promote employees to fill levels 2-4. Work from top to ensure each level is full.
    while level > 1:
        while len(worker_db[level]) < level_sizes[level]:
#           promote most senior employee from one level down:
            worker_db[level].append(worker_db[level-1][0])
    
#           adjust level, end time for promoted employee
            worker_db[level][len(worker_db[level]) - 1].promote()
            worker_db[level][len(worker_db[level]) - 1].set_end_time()
        
#           remove employee from level below
            worker_db[level-1] = worker_db[level-1][1:]
        level -= 1
            
#   Hire new employees to fill level 1
    while len(worker_db[1]) < level_sizes[1]:
        hire_worker(level, start_id)

In [5]:
def overall_gender_distribution(db):
    count_male = 0
    count_female = 0
    total = 0
    
    for level in db.keys():
        for wrkr in db[level]:
            if wrkr.get_gender() == 0:
                count_male += 1
            else:
                count_female += 1
            total += 1
            
    return count_male, count_female, total

def level_gender_distribution(worker_level):
    count_male = 0
    count_female = 0
    total = 0
    
    for wrkr in worker_level:
        if wrkr.get_gender() == 0:
            count_male += 1
        else:
            count_female += 1
        total += 1
            
    pct_male = count_male
    pct_female = count_female
    
    return count_male, count_female, total


### Testing / Playground

In [6]:
# populate_workforce(worker_id)

# print([str(i) for i in worker_db[1]])

## Simulation:

### Key to working with final data:

- Workers stored in "worker_db"
    - There are 4 levels (1...4) corresponding to J, M, S, E
    - Each worker object has information on level, gender, start/end times

### Base Simulation:

In [7]:
state = 0
simulation_length = 30
num_trials = 5

print("press enter to start!")

input()

# Create initial, randomized workforce
worker_id = populate_workforce(worker_id)

# Enter simulation state
state = 1


beg_time = time.time()

# State method used to allow for retrials in the future-- not currently implemented.
while state == 1:
    print("starting simulation")
    for i in range(num_trials):
        print("starting new round")
        # for duration of simulation length:
        while time.time() <= beg_time + simulation_length:
            update_workforce(worker_id)
            time.sleep(.5)
            if (time.time() - beg_time) % 5.0 < .5: 
                print("round %i percent complete" % (round(((time.time() - beg_time) / simulation_length*100))))
                
        print("round complete.")
        print("population male: %i, population female: %i, total: %i \n" % (overall_gender_distribution(worker_db)))
        
        beg_time = time.time()
        
    state = 0
        
    
    
    print("Simulation complete.")

    print("Final Stats:\n")

    for i in range(1,5):
        print("level %s" % work_levels[i])
        print("population male: %i, population female: %i, total: %i \n" % (level_gender_distribution(worker_db[i])))

press enter to start!

starting simulation
starting new round
round 17 percent complete
round 34 percent complete
round 50 percent complete
round 67 percent complete
round 84 percent complete
round 101 percent complete
round complete.
population male: 295, population female: 235, total: 530 

starting new round
round 17 percent complete
round 34 percent complete
round 50 percent complete
round 67 percent complete
round 84 percent complete
round 101 percent complete
round complete.
population male: 266, population female: 264, total: 530 

starting new round
round 17 percent complete
round 34 percent complete
round 50 percent complete
round 67 percent complete
round 84 percent complete
round 101 percent complete
round complete.
population male: 270, population female: 260, total: 530 

starting new round
round 17 percent complete
round 34 percent complete
round 50 percent complete
round 67 percent complete
round 84 percent complete
round 101 percent complete
round complete.
population m

### All-Female Simulation:

In [8]:
state = 0
simulation_length = 30
num_trials = 5

print("press enter to start!")

input()

# Create initial, randomized workforce
worker_id = populate_workforce(worker_id, all_female=True)

# Enter simulation state
state = 1


beg_time = time.time()

# State method used to allow for retrials in the future-- not currently implemented.
while state == 1:
    print("starting simulation")
    print("Starting Stats: \npopulation male: %i, population female: %i, total: %i \n" % (overall_gender_distribution(worker_db)))
    for i in range(num_trials):
        print("starting new round")
        # for duration of simulation length:
        while time.time() <= beg_time + simulation_length:
            update_workforce(worker_id)
            time.sleep(.5)
            if (time.time() - beg_time) % 5.0 < .5: 
                print("round %i percent complete" % (round(((time.time() - beg_time) / simulation_length*100))))
                
        print("round complete.")
        print("population male: %i, population female: %i, total: %i \n" % (overall_gender_distribution(worker_db)))
        
        beg_time = time.time()
        
    state = 0
        
    
    
    print("Simulation complete.")

    print("Final Stats:\n")

    for i in range(1,5):
        print("level %s" % work_levels[i])
        print("population male: %i, population female: %i, total: %i \n" % (level_gender_distribution(worker_db[i])))

press enter to start!

starting simulation
Starting Stats: 
population male: 0, population female: 530, total: 530 

starting new round
round 17 percent complete
round 34 percent complete
round 51 percent complete
round 67 percent complete
round 84 percent complete
round 101 percent complete
round complete.
population male: 215, population female: 315, total: 530 

starting new round
round 17 percent complete
round 34 percent complete
round 50 percent complete
round 67 percent complete
round 84 percent complete
round 101 percent complete
round complete.
population male: 268, population female: 262, total: 530 

starting new round
round 17 percent complete
round 34 percent complete
round 51 percent complete
round 67 percent complete
round 84 percent complete
round 101 percent complete
round complete.
population male: 282, population female: 248, total: 530 

starting new round
round 17 percent complete
round 34 percent complete
round 50 percent complete
round 67 percent complete
round 8

In [13]:
state = 0
round_length = 30
num_rounds = 5

print("press enter to start!")

input()

# Create initial, randomized workforce
worker_id = populate_workforce(worker_id, all_male=True)

# Enter simulation state
state = 1


beg_time = time.time()

# State method used to allow for retrials in the future-- not currently implemented.
while state == 1:
    print("starting simulation")
    
    print("Starting Stats: \npopulation male: %i, population female: %i, total: %i \n" % (overall_gender_distribution(worker_db)))
    
    for i in range(num_rounds):
        print("starting new round")
        # for duration of simulation length:
        while time.time() <= beg_time + round_length:
            update_workforce(worker_id)
            time.sleep(.5)
            if (time.time() - beg_time) % (round_length/5) < .5: 
                print("round %i percent complete" % (min(100,round(((time.time() - beg_time) / round_length*100)))))
                
        print("round complete.")
        print("population male: %i, population female: %i, total: %i \n" % (overall_gender_distribution(worker_db)))
        
        beg_time = time.time()
        
    state = 0
        
    
    
    print("Simulation complete.")

    print("Final Stats:\n")

    for i in range(1,5):
        print("level %s" % work_levels[i])
        print("population male: %i, population female: %i, total: %i \n" % (level_gender_distribution(worker_db[i])))

press enter to start!

starting simulation
Starting Stats: 
population male: 530, population female: 0, total: 530 

starting new round
round 20 percent complete
round 40 percent complete
round 61 percent complete
round 81 percent complete
round 100 percent complete
round complete.
population male: 378, population female: 152, total: 530 

starting new round
round 20 percent complete
round 40 percent complete
round 61 percent complete
round 81 percent complete
round 100 percent complete
round complete.
population male: 300, population female: 230, total: 530 

starting new round
round 20 percent complete
round 40 percent complete
round 61 percent complete
round 81 percent complete
round 100 percent complete
round complete.
population male: 291, population female: 239, total: 530 

starting new round
round 20 percent complete
round 40 percent complete
round 61 percent complete
round 81 percent complete
round 100 percent complete
round complete.
population male: 274, population female: 2