<h1 style="text-align: center;">GREEDY EPISODE (PARITY DATA)</h1>

## 0. PACKAGE REQUIREMENTS

In [1]:
# Packages 
import random
import pandas as pd
import numpy as np
import sys
import os
from IPython.display import clear_output

# Model imports
import sys
import os

In [2]:
# Environment requirements
module_path = os.path.abspath(os.path.join('..', '..', 'environment'))
if module_path not in sys.path:
    sys.path.append(module_path)
from environment_basic import Job, Candidate, Environment

# Agent requirements
module_path = os.path.abspath(os.path.join('..', '..', 'agents'))
if module_path not in sys.path:
    sys.path.append(module_path)
from agents_basic import GreedyAgent

# Tool requirements
module_path = os.path.abspath(os.path.join('..', '..', 'tools'))
if module_path not in sys.path:
    sys.path.append(module_path)
from tools import calculate_sampled_men, calculate_hired_men

## 1. GREEDY EPISODE FUNCTIONS

In [3]:
# Function that runs the greedy agent on the sample data
def run_greedy_agent(sample_jobs, sample_candidates):
    env = Environment(sample_jobs, sample_candidates)
    agent = GreedyAgent()
    state = env.reset()
    done = False
    total_reward = 0

    while not done:

        action = agent.select_action(state)
        next_state, reward, done = env.step(action)
        total_reward += reward
        state = next_state

        if done:
            print('Final state:')
            state.display_state()
            print('\nEpisode Reward:')
            print(total_reward)
            gender_distribution = state.gender_distribution 
            break
    
    return gender_distribution, total_reward

In [4]:
# Function that initialises the sample, sets the seed and executes the greedy agent

def episode_greedy_agent(num_jobs, num_candidates, job_list, candidate_list, results_df):
    # Initialise the data
    random.seed(1)
    sample_jobs = random.sample(job_list, num_jobs)

    random.seed(1)
    sample_candidates = random.sample(candidate_list, num_candidates)

    gender_distribution, total_reward = run_greedy_agent(sample_jobs, sample_candidates)

    sampled_men = calculate_sampled_men(sample_candidates)
    hired_men = calculate_hired_men(gender_distribution)

    # Add the results to results_df
    new_row = pd.DataFrame([{"agent": "Greedy", "running_seed": None, "sample_seed": 1, "num_jobs": num_jobs, "num_candidates": num_candidates, 
                                 "reward": total_reward,  "sampled_men_percent": sampled_men, "hired_men_percent": hired_men,
                                 "data": 'parity_data', "debiased": 'No'}])

    results_df = pd.concat([results_df, new_row], ignore_index=True)

    return results_df

## 2. INITIALISE THE DATA

In [5]:
# Read in the job data
file_path = '../../data/cleaned_data/jobs_cleaned.csv'
jobs_df = pd.read_csv(file_path)

# Read in the candidate data
file_path = '../../data/cleaned_data/candidates_cleaned.csv'
candidates_df = pd.read_csv(file_path)

# Create a parity data set
women_df = candidates_df[candidates_df['Gender'] == 1]
men_df = candidates_df[candidates_df['Gender'] == 2]

sampled_men_df = men_df.sample(n=len(women_df), random_state=1)
parity_df = pd.concat([women_df, sampled_men_df], ignore_index=True)

parity_df['Gender'].value_counts()

1    2329
2    2329
Name: Gender, dtype: int64

In [6]:
# Initialise the class objects with data from the dataframes
job_list = []
for i in range(len(jobs_df)):
    job = Job(i+1, jobs_df['Degree_Bachelor'][i], jobs_df['Degree_Master'][i], jobs_df['Degree_Other'][i],
              jobs_df['Software_Programming'][i], jobs_df['C_Programming'][i], jobs_df['Python_Programming'][i], 
              jobs_df['JavaScript_Programming'][i], jobs_df['Professional_Software_Experience'][i], 
              jobs_df['Management_Skills'][i], jobs_df['Engineer'][i], int(jobs_df['Minimum_Pay'][i]))
    job_list.append(job)

candidate_list = []
for i in range(len(parity_df)):
    candidate = Candidate(i+1, parity_df['Gender'][i], parity_df['Degree_Bachelor'][i], parity_df['Degree_Master'][i], parity_df['Degree_Other'][i], 
                          parity_df['Software_Programming'][i], parity_df['C_Programming'][i], parity_df['Python_Programming'][i], 
                          parity_df['JavaScript_Programming'][i], parity_df['Professional_Software_Experience'][i], 
                          parity_df['Management_Skills'][i], parity_df['Engineer'][i], int(parity_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

print("Job Data:", len(job_list), " Candidate Data:", len(candidate_list))

Job Data: 806  Candidate Data: 4658


## 3. EPISODES

In [7]:
# Initialise a dataframe to store the results
columns = ["agent", "running_seed", "sample_seed", "num_jobs", "num_candidates", "reward", "sampled_men_percent", "hired_men_percent", "data", "debiased"]
greedy_results_df = pd.DataFrame(columns=columns)

In [8]:
# 10 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(10, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 99
Job 2 allocated to Candidate 23
Job 3 allocated to Candidate 19
Job 4 allocated to Candidate 71
Job 5 allocated to Candidate 7
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 70

Gender distribution:
   Woman  Man
0      0    7

Episode Reward:
-101572


In [9]:
# 20 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(20, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 99
Job 2 allocated to Candidate 23
Job 3 allocated to Candidate 19
Job 4 allocated to Candidate 71
Job 5 allocated to Candidate 7
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 70
Job 10 allocated to Candidate 55
Job 11 allocated to Candidate 81
Job 12 allocated to Candidate 25
Job 13 allocated to Candidate 11
Job 16 allocated to Candidate 88
Job 17 allocated to Candidate 18
Job 18 allocated to Candidate 14
Job 19 allocated to Candidate 30

Gender distribution:
   Woman  Man
0      4   11

Episode Reward:
-362280


In [10]:
# 50 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(50, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 99
Job 2 allocated to Candidate 23
Job 3 allocated to Candidate 19
Job 4 allocated to Candidate 71
Job 5 allocated to Candidate 81
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 18
Job 10 allocated to Candidate 55
Job 11 allocated to Candidate 67
Job 12 allocated to Candidate 69
Job 13 allocated to Candidate 91
Job 16 allocated to Candidate 88
Job 17 allocated to Candidate 35
Job 18 allocated to Candidate 14
Job 19 allocated to Candidate 30
Job 20 allocated to Candidate 25
Job 21 allocated to Candidate 84
Job 22 allocated to Candidate 49
Job 23 allocated to Candidate 83
Job 24 allocated to Candidate 16
Job 27 allocated to Candidate 22
Job 28 allocated to Candidate 76
Job 30 allocated to Candidate 28
Job 32 allocated to Candidate 29
Job 34 allocated to Candidate 58
Job 35 allocated to Candidate 62
Job 37 allocated to Candidate 7
Job 40 allocated to Candidate 11
Job 41 allocated to Candidate 70
Job 42 allocated to Candi

In [11]:
# 100 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(100, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 59
Job 2 allocated to Candidate 14
Job 3 allocated to Candidate 19
Job 4 allocated to Candidate 71
Job 5 allocated to Candidate 62
Job 9 allocated to Candidate 56
Job 10 allocated to Candidate 9
Job 11 allocated to Candidate 96
Job 12 allocated to Candidate 17
Job 13 allocated to Candidate 63
Job 16 allocated to Candidate 88
Job 17 allocated to Candidate 78
Job 18 allocated to Candidate 30
Job 19 allocated to Candidate 25
Job 20 allocated to Candidate 11
Job 21 allocated to Candidate 84
Job 22 allocated to Candidate 41
Job 23 allocated to Candidate 79
Job 24 allocated to Candidate 27
Job 27 allocated to Candidate 66
Job 28 allocated to Candidate 76
Job 30 allocated to Candidate 80
Job 32 allocated to Candidate 13
Job 34 allocated to Candidate 43
Job 35 allocated to Candidate 15
Job 37 allocated to Candidate 7
Job 40 allocated to Candidate 18
Job 42 allocated to Candidate 81
Job 43 allocated to Candidate 89
Job 44 allocated to Candi

In [12]:
greedy_results_df

Unnamed: 0,agent,running_seed,sample_seed,num_jobs,num_candidates,reward,sampled_men_percent,hired_men_percent,data,debiased
0,Greedy,,1,10,100,-101572,0.56,1.0,parity_data,No
1,Greedy,,1,20,100,-362280,0.56,0.733333,parity_data,No
2,Greedy,,1,50,100,-1447219,0.56,0.571429,parity_data,No
3,Greedy,,1,100,100,-3560055,0.56,0.596774,parity_data,No


In [13]:
# Save the greedy results in a csv
greedy_results_df.to_csv("greedy_results_parity.csv")