<h1 style="text-align: center;">GREEDY EPISODE (PARITY DATA)</h1>

## 0. PACKAGE REQUIREMENTS

In [1]:
# Packages 
import random
import pandas as pd
import numpy as np
import sys
import os
from IPython.display import clear_output

# Model imports
import sys
import os

In [2]:
# Environment requirements
module_path = os.path.abspath(os.path.join('..', '..', 'environment'))
if module_path not in sys.path:
    sys.path.append(module_path)
from environment import Job, Candidate, Environment

# Agent requirements
module_path = os.path.abspath(os.path.join('..', '..', 'agents'))
if module_path not in sys.path:
    sys.path.append(module_path)
from agents import GreedyAgent

# Tool requirements
module_path = os.path.abspath(os.path.join('..', '..', 'tools'))
if module_path not in sys.path:
    sys.path.append(module_path)
from tools import calculate_sampled_men, calculate_hired_men

## 1. GREEDY EPISODE FUNCTIONS

In [3]:
# Function that runs the greedy agent on the sample data
def run_greedy_agent(sample_jobs, sample_candidates):
    env = Environment(sample_jobs, sample_candidates)
    agent = GreedyAgent()
    state = env.reset()
    done = False
    total_reward = 0

    while not done:

        action = agent.select_action(state)
        next_state, reward, done = env.step(action)
        total_reward += reward
        state = next_state

        if done:
            print('Final state:')
            state.display_state()
            print('\nEpisode Reward:')
            print(total_reward)
            gender_distribution = state.gender_distribution 
            break
    
    return gender_distribution, total_reward

In [4]:
# Function that initialises the sample, sets the seed and executes the greedy agent

def episode_greedy_agent(num_jobs, num_candidates, job_list, candidate_list, results_df):
    # Initialise the data
    random.seed(1)
    sample_jobs = random.sample(job_list, num_jobs)

    random.seed(1)
    sample_candidates = random.sample(candidate_list, num_candidates)

    gender_distribution, total_reward = run_greedy_agent(sample_jobs, sample_candidates)

    sampled_men = calculate_sampled_men(sample_candidates)
    hired_men = calculate_hired_men(gender_distribution)

    # Add the results to results_df
    new_row = pd.DataFrame([{"agent": "Greedy", "running_seed": None, "sample_seed": 1, "num_jobs": num_jobs, "num_candidates": num_candidates, 
                                 "reward": total_reward,  "sampled_men_percent": sampled_men, "hired_men_percent": hired_men,
                                 "data": 'normalised_data', "debiased": 'No'}])

    results_df = pd.concat([results_df, new_row], ignore_index=True)

    return results_df

## 2. INITIALISE THE DATA

- 333947/1846122 = 18 % women
- 1512175/1846122 = 82 % men

In [5]:
# Read in the job data
file_path = '../../data/jobs_data_cleaned.csv'
jobs_df = pd.read_csv(file_path)

# Read in the candidate data
file_path = '../../data/candidates_data_cleaned.csv'
candidates_df = pd.read_csv(file_path)

# Create a normalised data set, where women make up 18%
women_df = candidates_df[candidates_df['Gender'] == 1]
men_df = candidates_df[candidates_df['Gender'] == 2]

total_women = len(women_df)
women_percentage = 0.18

new_dataset_size = round(total_women/women_percentage)
required_men = new_dataset_size - total_women

sampled_men_df = men_df.sample(n=required_men, random_state=1)
normalised_df = pd.concat([women_df, sampled_men_df], ignore_index=True)

normalised_df['Gender'].value_counts()

2    10610
1     2329
Name: Gender, dtype: int64

In [6]:
# Initialise the class objects with data from the dataframes
job_list = []
for i in range(len(jobs_df)):
    job = Job(i+1, jobs_df['Degree_Bachelor'][i], jobs_df['Degree_Master'][i], jobs_df['Degree_Other'][i],
              jobs_df['Software_Programming'][i], jobs_df['C_Programming'][i], jobs_df['Python_Programming'][i], 
              jobs_df['JavaScript_Programming'][i], jobs_df['Professional_Software_Experience'][i], 
              jobs_df['Management_Skills'][i], jobs_df['Engineer'][i], int(jobs_df['Minimum_Pay'][i]))
    job_list.append(job)

candidate_list = []
for i in range(len(normalised_df)):
    candidate = Candidate(i+1, normalised_df['Gender'][i], normalised_df['Degree_Bachelor'][i], normalised_df['Degree_Master'][i], normalised_df['Degree_Other'][i], 
                          normalised_df['Software_Programming'][i], normalised_df['C_Programming'][i], normalised_df['Python_Programming'][i], 
                          normalised_df['JavaScript_Programming'][i], normalised_df['Professional_Software_Experience'][i], 
                          normalised_df['Management_Skills'][i], normalised_df['Engineer'][i], int(normalised_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

print("Job Data:", len(job_list), " Candidate Data:", len(candidate_list))

Job Data: 806  Candidate Data: 12939


## 3. EPISODES

In [7]:
# Initialise a dataframe to store the results
columns = ["agent", "running_seed", "sample_seed", "num_jobs", "num_candidates", "reward", "sampled_men_percent", "hired_men_percent", "data", "debiased"]
greedy_results_df = pd.DataFrame(columns=columns)

In [8]:
# 10 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(10, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 48
Job 1 allocated to Candidate 64
Job 2 allocated to Candidate 29
Job 3 allocated to Candidate 31
Job 4 allocated to Candidate 68
Job 5 allocated to Candidate 20
Job 6 allocated to Candidate 70
Job 7 allocated to Candidate 28
Job 9 allocated to Candidate 56

Gender distribution:
   Woman  Man
0      3    6

Episode Reward:
-147909


In [9]:
# 20 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(20, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 48
Job 1 allocated to Candidate 64
Job 2 allocated to Candidate 29
Job 3 allocated to Candidate 31
Job 4 allocated to Candidate 68
Job 5 allocated to Candidate 20
Job 6 allocated to Candidate 70
Job 7 allocated to Candidate 28
Job 9 allocated to Candidate 56
Job 10 allocated to Candidate 12
Job 11 allocated to Candidate 22
Job 12 allocated to Candidate 45
Job 13 allocated to Candidate 7
Job 14 allocated to Candidate 86
Job 16 allocated to Candidate 17
Job 17 allocated to Candidate 76
Job 18 allocated to Candidate 75
Job 19 allocated to Candidate 66

Gender distribution:
   Woman  Man
0      4   14

Episode Reward:
-382606


In [10]:
# 50 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(50, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 48
Job 1 allocated to Candidate 74
Job 2 allocated to Candidate 29
Job 3 allocated to Candidate 31
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 22
Job 6 allocated to Candidate 86
Job 7 allocated to Candidate 28
Job 9 allocated to Candidate 45
Job 10 allocated to Candidate 12
Job 11 allocated to Candidate 92
Job 12 allocated to Candidate 58
Job 13 allocated to Candidate 60
Job 16 allocated to Candidate 17
Job 17 allocated to Candidate 32
Job 18 allocated to Candidate 75
Job 19 allocated to Candidate 66
Job 20 allocated to Candidate 7
Job 21 allocated to Candidate 38
Job 22 allocated to Candidate 88
Job 23 allocated to Candidate 79
Job 24 allocated to Candidate 54
Job 25 allocated to Candidate 70
Job 27 allocated to Candidate 55
Job 28 allocated to Candidate 94
Job 30 allocated to Candidate 9
Job 32 allocated to Candidate 63
Job 33 allocated to Candidate 52
Job 34 allocated to Candidate 46
Job 35 allocated to Candidat

In [11]:
# 100 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(100, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 99
Job 2 allocated to Candidate 75
Job 3 allocated to Candidate 46
Job 4 allocated to Candidate 57
Job 5 allocated to Candidate 90
Job 6 allocated to Candidate 74
Job 9 allocated to Candidate 69
Job 10 allocated to Candidate 12
Job 11 allocated to Candidate 23
Job 12 allocated to Candidate 18
Job 13 allocated to Candidate 78
Job 16 allocated to Candidate 17
Job 17 allocated to Candidate 67
Job 18 allocated to Candidate 66
Job 19 allocated to Candidate 7
Job 20 allocated to Candidate 76
Job 22 allocated to Candidate 88
Job 23 allocated to Candidate 79
Job 24 allocated to Candidate 11
Job 25 allocated to Candidate 86
Job 27 allocated to Candidate 42
Job 28 allocated to Candidate 97
Job 30 allocated to Candidate 98
Job 32 allocated to Candidate 19
Job 34 allocated to Candidate 77
Job 35 allocated to Candidate 91
Job 37 allocated to Candidate 68
Job 40 allocated to Candidate 92
Job 42 allocated to Candidate 20
Job 43 allocated to Candi

In [12]:
greedy_results_df

Unnamed: 0,agent,running_seed,sample_seed,num_jobs,num_candidates,reward,sampled_men_percent,hired_men_percent,data,debiased
0,Greedy,,1,10,100,-147909,0.83,0.666667,normalised_data,No
1,Greedy,,1,20,100,-382606,0.83,0.777778,normalised_data,No
2,Greedy,,1,50,100,-1749817,0.83,0.829268,normalised_data,No
3,Greedy,,1,100,100,-3257760,0.83,0.8,normalised_data,No


In [13]:
# Save the greedy results with the old state representation in a csv
greedy_results_df.to_csv("greedy_results_normalised_data.csv")