<h1 style="text-align: center;">GREEDY EPISODE (INITIAL DATA)</h1>

## 0. PACKAGE REQUIREMENTS

In [1]:
# Packages 
import random
import pandas as pd
import numpy as np
import sys
import os
from IPython.display import clear_output

# Model imports
import sys
import os

In [2]:
# Environment requirements
module_path = os.path.abspath(os.path.join('..', '..', 'environment'))
if module_path not in sys.path:
    sys.path.append(module_path)
from environment_basic import Job, Candidate, Environment

# Agent requirements
module_path = os.path.abspath(os.path.join('..', '..', 'agents'))
if module_path not in sys.path:
    sys.path.append(module_path)
from agents_basic import GreedyAgent

# Tool requirements
module_path = os.path.abspath(os.path.join('..', '..', 'tools'))
if module_path not in sys.path:
    sys.path.append(module_path)
from tools import calculate_sampled_men, calculate_hired_men

## 1. GREEDY EPISODE FUNCTIONS

In [3]:
# Function that runs the greedy agent on the sample data
def run_greedy_agent(sample_jobs, sample_candidates):
    env = Environment(sample_jobs, sample_candidates)
    agent = GreedyAgent()
    state = env.reset()
    done = False
    total_reward = 0

    while not done:

        action = agent.select_action(state)
        next_state, reward, done = env.step(action)
        total_reward += reward
        state = next_state

        if done:
            print('Final state:')
            state.display_state()
            print('\nEpisode Reward:')
            print(total_reward)
            gender_distribution = state.gender_distribution 
            break
    
    return gender_distribution, total_reward

In [4]:
# Function that initialises the sample, sets the seed and executes the greedy agent

def episode_greedy_agent(num_jobs, num_candidates, job_list, candidate_list, results_df):
    # Initialise the data
    random.seed(1)
    sample_jobs = random.sample(job_list, num_jobs)

    random.seed(1)
    sample_candidates = random.sample(candidate_list, num_candidates)

    gender_distribution, total_reward = run_greedy_agent(sample_jobs, sample_candidates)

    sampled_men = calculate_sampled_men(sample_candidates)
    hired_men = calculate_hired_men(gender_distribution)

    # Add the results to results_df
    new_row = pd.DataFrame([{"agent": "Greedy", "running_seed": None, "sample_seed": 1, "num_jobs": num_jobs, "num_candidates": num_candidates, 
                                 "reward": total_reward,  "sampled_men_percent": sampled_men, "hired_men_percent": hired_men,
                                 "data": 'initial_data', "debiased": 'No'}])

    results_df = pd.concat([results_df, new_row], ignore_index=True)

    return results_df

## 2. INITIALISE THE DATA

In [5]:
# Read in the job data
file_path = '../../data/cleaned_data/jobs_cleaned.csv'
jobs_df = pd.read_csv(file_path)

# Read in the candidate data
file_path = '../../data/cleaned_data/candidates_cleaned.csv'
candidates_df = pd.read_csv(file_path)

# Initialise the class objects with data from the dataframes
job_list = []
for i in range(len(jobs_df)):
    job = Job(i+1, jobs_df['Degree_Bachelor'][i], jobs_df['Degree_Master'][i], jobs_df['Degree_Other'][i],
              jobs_df['Software_Programming'][i], jobs_df['C_Programming'][i], jobs_df['Python_Programming'][i], 
              jobs_df['JavaScript_Programming'][i], jobs_df['Professional_Software_Experience'][i], 
              jobs_df['Management_Skills'][i], jobs_df['Engineer'][i], int(jobs_df['Minimum_Pay'][i]))
    job_list.append(job)

candidate_list = []
for i in range(len(candidates_df)):
    candidate = Candidate(i+1, candidates_df['Gender'][i], candidates_df['Degree_Bachelor'][i], candidates_df['Degree_Master'][i], candidates_df['Degree_Other'][i], 
                          candidates_df['Software_Programming'][i], candidates_df['C_Programming'][i], candidates_df['Python_Programming'][i], 
                          candidates_df['JavaScript_Programming'][i], candidates_df['Professional_Software_Experience'][i], 
                          candidates_df['Management_Skills'][i], candidates_df['Engineer'][i], int(candidates_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

print("Job Data:", len(job_list), " Candidate Data:", len(candidate_list))

Job Data: 806  Candidate Data: 49005


## 3. EPISODES

In [6]:
# Initialise a dataframe to store the results
columns = ["agent", "running_seed", "sample_seed", "num_jobs", "num_candidates", "reward", "sampled_men_percent", "hired_men_percent", "data", "debiased"]
greedy_results_df = pd.DataFrame(columns=columns)

In [7]:
# 10 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(10, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 12
Job 1 allocated to Candidate 16
Job 2 allocated to Candidate 86
Job 3 allocated to Candidate 95
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 13
Job 6 allocated to Candidate 24
Job 7 allocated to Candidate 81
Job 9 allocated to Candidate 25

Gender distribution:
   Woman  Man
0      1    8

Episode Reward:
-211399


In [8]:
# 20 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(20, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 12
Job 1 allocated to Candidate 16
Job 2 allocated to Candidate 86
Job 3 allocated to Candidate 95
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 13
Job 6 allocated to Candidate 24
Job 7 allocated to Candidate 81
Job 9 allocated to Candidate 25
Job 10 allocated to Candidate 68
Job 11 allocated to Candidate 31
Job 12 allocated to Candidate 78
Job 13 allocated to Candidate 73
Job 14 allocated to Candidate 0
Job 15 allocated to Candidate 7
Job 16 allocated to Candidate 90
Job 17 allocated to Candidate 65
Job 18 allocated to Candidate 71
Job 19 allocated to Candidate 49

Gender distribution:
   Woman  Man
0      2   17

Episode Reward:
-658349


In [9]:
# 50 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(50, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 89
Job 1 allocated to Candidate 36
Job 2 allocated to Candidate 86
Job 3 allocated to Candidate 95
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 13
Job 6 allocated to Candidate 81
Job 7 allocated to Candidate 0
Job 9 allocated to Candidate 31
Job 10 allocated to Candidate 68
Job 11 allocated to Candidate 65
Job 12 allocated to Candidate 50
Job 13 allocated to Candidate 64
Job 16 allocated to Candidate 90
Job 17 allocated to Candidate 72
Job 18 allocated to Candidate 71
Job 19 allocated to Candidate 49
Job 20 allocated to Candidate 78
Job 21 allocated to Candidate 20
Job 22 allocated to Candidate 75
Job 23 allocated to Candidate 97
Job 24 allocated to Candidate 2
Job 25 allocated to Candidate 24
Job 27 allocated to Candidate 44
Job 28 allocated to Candidate 29
Job 30 allocated to Candidate 80
Job 32 allocated to Candidate 16
Job 33 allocated to Candidate 74
Job 34 allocated to Candidate 84
Job 35 allocated to Candidat

In [10]:
# 100 Jobs, 100 Candidates
greedy_results_df = episode_greedy_agent(100, 100, job_list, candidate_list, greedy_results_df)

Final state:
Allocations:
Job 0 allocated to Candidate 89
Job 2 allocated to Candidate 71
Job 3 allocated to Candidate 95
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 88
Job 6 allocated to Candidate 16
Job 7 allocated to Candidate 0
Job 9 allocated to Candidate 83
Job 10 allocated to Candidate 68
Job 11 allocated to Candidate 85
Job 12 allocated to Candidate 21
Job 13 allocated to Candidate 99
Job 16 allocated to Candidate 90
Job 17 allocated to Candidate 67
Job 18 allocated to Candidate 49
Job 19 allocated to Candidate 78
Job 20 allocated to Candidate 73
Job 21 allocated to Candidate 69
Job 22 allocated to Candidate 33
Job 23 allocated to Candidate 70
Job 24 allocated to Candidate 92
Job 25 allocated to Candidate 24
Job 27 allocated to Candidate 20
Job 28 allocated to Candidate 87
Job 30 allocated to Candidate 6
Job 32 allocated to Candidate 36
Job 33 allocated to Candidate 7
Job 34 allocated to Candidate 75
Job 35 allocated to Candidate 46
Job 37 allocated to Candidat

In [11]:
greedy_results_df

Unnamed: 0,agent,running_seed,sample_seed,num_jobs,num_candidates,reward,sampled_men_percent,hired_men_percent,data,debiased
0,Greedy,,1,10,100,-211399,0.97,0.888889,initial_data,No
1,Greedy,,1,20,100,-658349,0.97,0.894737,initial_data,No
2,Greedy,,1,50,100,-2187552,0.97,0.926829,initial_data,No
3,Greedy,,1,100,100,-3932078,0.97,0.951613,initial_data,No


In [12]:
# Save the greedy results in a csv
greedy_results_df.to_csv("greedy_results_initial.csv")