<h1 style="text-align: center;">OPTIMISATION DQN PARAMETERS (ENDORSE)</h1>

## 0. PACKAGE REQUIREMENTS

In [1]:
# Packages
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
from IPython.display import clear_output
import warnings

# DQN Requirements
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import namedtuple

# Model imports
import sys
import os

In [2]:
warnings.filterwarnings("ignore", message=".*To copy construct from a tensor.*")

In [3]:
# Environment requirements
module_path = os.path.abspath(os.path.join('..', '..', 'environment'))
if module_path not in sys.path:
    sys.path.append(module_path)
from environment_basic import Job, Candidate, Environment

module_path = os.path.abspath(os.path.join('..', '..', 'environment'))
if module_path not in sys.path:
    sys.path.append(module_path)
from environment_endorse import EnvironmentEndorse

# Agent requirements
module_path = os.path.abspath(os.path.join('..', '..', 'agents'))
if module_path not in sys.path:
    sys.path.append(module_path)
from agents_endorse import DQNAgentEndorse, DQN, ReplayBuffer

# Tool requirements
module_path = os.path.abspath(os.path.join('..', '..', 'tools'))
if module_path not in sys.path:
    sys.path.append(module_path)
from tools import calculate_sampled_men, calculate_hired_men, get_best_allocation, visualize_loss, set_seed

## 1. DQN EPISODE FUNCTIONS

In [4]:
def run_dqn_agent_optimisation_endorse(sample_jobs, sample_candidates, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate, num_episodes, show_schedule = False):
    # Initialize the environment
    env = EnvironmentEndorse(sample_jobs, sample_candidates)
    state = env.reset()
    state_tensor = torch.tensor(state.to_tensor(), dtype=torch.float32).unsqueeze(0)

    # Initialise the state and action sizes
    state_size = state.num_jobs * state.num_candidates
    action_size = len(state.available_actions)

    episode_rewards = []
    loss_list = []
    iteration = 0

    # Initialise the agent
    agent = DQNAgentEndorse(state_size, action_size, env, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate)

    for _ in range(num_episodes):
        iteration += 1
        state = env.reset()
        state_tensor = torch.tensor(state.to_tensor(), dtype=torch.float32).unsqueeze(0)
        total_reward = 0
        done = False

        while not done:
            action_tuple = agent.select_action(state_tensor, env.current_state.available_actions, env.action_indices)
            next_state, reward, done = env.step(action_tuple)
            next_state_tensor = torch.tensor(next_state.to_tensor(), dtype=torch.float32).unsqueeze(0)

            agent.store_transition(state_tensor, action_tuple, reward, next_state_tensor, done)
            state_tensor = next_state_tensor

            loss = agent.optimize_model()
            total_reward += reward

            if done:
                agent.update_target_net()
                episode_rewards.append(total_reward)
                break
        
        loss_list.append(loss)

    # Save the model
    agent.save_model('DQN_optimisation')
    agent.load_model('DQN_optimisation')

    gender_distribution, tot_reward = get_best_allocation(agent, env, show_schedule=True)
    
    #return episode_rewards, tot_reward, loss_list
    return gender_distribution, tot_reward

## 2. INITIALISE THE DATA

In [5]:
# Read in the job data
file_path = '../../data/cleaned_data/jobs_cleaned.csv'
jobs_df = pd.read_csv(file_path)

# Read in the candidate data
file_path = '../../data/cleaned_data/candidates_cleaned.csv'
candidates_df = pd.read_csv(file_path)

# Initialise the class objects with data from the dataframes
job_list = []
for i in range(len(jobs_df)):
    job = Job(i+1, jobs_df['Degree_Bachelor'][i], jobs_df['Degree_Master'][i], jobs_df['Degree_Other'][i],
              jobs_df['Software_Programming'][i], jobs_df['C_Programming'][i], jobs_df['Python_Programming'][i], 
              jobs_df['JavaScript_Programming'][i], jobs_df['Professional_Software_Experience'][i], 
              jobs_df['Management_Skills'][i], jobs_df['Engineer'][i], int(jobs_df['Minimum_Pay'][i]))
    job_list.append(job)

candidate_list = []
for i in range(len(candidates_df)):
    candidate = Candidate(i+1, candidates_df['Gender'][i], candidates_df['Degree_Bachelor'][i], candidates_df['Degree_Master'][i], candidates_df['Degree_Other'][i], 
                          candidates_df['Software_Programming'][i], candidates_df['C_Programming'][i], candidates_df['Python_Programming'][i], 
                          candidates_df['JavaScript_Programming'][i], candidates_df['Professional_Software_Experience'][i], 
                          candidates_df['Management_Skills'][i], candidates_df['Engineer'][i], int(candidates_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

print("Job Data:", len(job_list), " Candidate Data:", len(candidate_list))

Job Data: 806  Candidate Data: 49005


## 3. LOOP THROUGH PARAMETERS

In [6]:
# 144 combinations
batch_size_values = [16, 32, 64]
gamma_values = [0.9, 0.95, 0.99]
epsilon_values = [1, 0.5]
epsilon_min_values = [0.1, 0.001]
epsilon_decay_values = [0.985, 0.995]
learning_rate_values = [0.0005, 0.001]

columns = ["candidates", "jobs", "data_seed", "dqn_seed", "batch_size", "gamma", "epsilon", "epsilon_min", "epsilon_decay", "learning_rate", "total_reward"]
results_parameters_dqn_endorse = pd.DataFrame(columns=columns)

In [7]:
def run_optimisation_endorse(sample_jobs, sample_candidates, input_data, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_df):
    num_combinations = len(batch_size_values) * len(gamma_values) * len(epsilon_values) * len(epsilon_min_values) * len(epsilon_decay_values) * len(learning_rate_values)
    current_iteration = 0
    
    for batch_size in batch_size_values:
        for gamma in gamma_values:
            for epsilon in epsilon_values:
                for epsilon_min in epsilon_min_values:
                    for epsilon_decay in epsilon_decay_values:
                        for learning_rate in learning_rate_values:
                            clear_output()

                            current_iteration += 1
                            print(f"Iteration {current_iteration}/{num_combinations}; Testing combination {batch_size}, {gamma}, {epsilon}, {epsilon_min}, {epsilon_decay}, {learning_rate}")

                            set_seed(1)

                            gender_distribution, total_reward = run_dqn_agent_optimisation_endorse(sample_jobs, sample_candidates, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate, num_episodes=1000)
                            
                            # Add the results to the dqn_results_df dataframe
                            new_row = pd.DataFrame([{"candidates": len(sample_candidates), "jobs": len(sample_jobs), "data": input_data, "data_seed": 1, "dqn_seed": 1, 
                                                     "batch_size": batch_size, "gamma": gamma, "epsilon": epsilon, "epsilon_min": epsilon_min,
                                                     "epsilon_decay": epsilon_decay, "learning_rate": learning_rate, "total_reward": total_reward}])
                            print(new_row)
                            results_df = pd.concat([results_df, new_row], ignore_index=True)

    return results_df

## 5. ORIGINAL DATA

In [8]:
# Sample data sets

# Data set 1: 10 jobs, 100 candidates
random.seed(1)
sample_jobs_10 = random.sample(job_list, 10)

random.seed(1)
sample_candidates_100 = random.sample(candidate_list, 100)

# Data set 2: 20 jobs, 100 candidates
random.seed(1)
sample_jobs_20 = random.sample(job_list, 20)

# Dataset 3: 50 jobs, 100 candidates
random.seed(1)
sample_jobs_50 = random.sample(job_list, 50)

# Dataset 4: 100 jobs, 100 candidates
random.seed(1)
sample_jobs_100 = random.sample(job_list, 100)

In [9]:
# Execute the optimisation function on Data Set 1
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_10, sample_candidates_100, "initial_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 12
Job 1 allocated to Candidate 81
Job 2 allocated to Candidate 86
Job 3 allocated to Candidate 95
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 13
Job 6 allocated to Candidate 24
Job 7 allocated to Candidate 52
Job 9 allocated to Candidate 31

Gender distribution:
   Woman  Man
0      2    7
None
Reward:  -280162.4
   candidates  jobs          data  data_seed  dqn_seed  batch_size  gamma  \
0         100    10  initial_data          1         1          64   0.99   

   epsilon  epsilon_min  epsilon_decay  learning_rate  total_reward  
0      0.5        0.001          0.995          0.001     -280162.4  


In [10]:
# Execute the optimisation function on Data Set 2
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_20, sample_candidates_100, "initial_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 12
Job 1 allocated to Candidate 0
Job 2 allocated to Candidate 71
Job 3 allocated to Candidate 13
Job 4 allocated to Candidate 31
Job 5 allocated to Candidate 78
Job 6 allocated to Candidate 81
Job 7 allocated to Candidate 24
Job 9 allocated to Candidate 95
Job 10 allocated to Candidate 68
Job 11 allocated to Candidate 56
Job 12 allocated to Candidate 84
Job 13 allocated to Candidate 25
Job 14 allocated to Candidate 74
Job 15 allocated to Candidate 7
Job 16 allocated to Candidate 90
Job 17 allocated to Candidate 73
Job 18 allocated to Candidate 49
Job 19 allocated to Candidate 64

Gender distribution:
   Woman  Man
0      3   16
None
Reward:  -1281559.5
   candidates  jobs          data  data_seed  dqn_seed  batch_size  gamma  \
0         100    20  initial_data          1         1          64   0.99   

   epsilon  epsilon_min  epsilon_decay  learning_rate  total_reward

In [12]:
# Execute the optimisation function on Data Set 3
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_50, sample_candidates_100, "initial_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 75
Job 1 allocated to Candidate 7
Job 2 allocated to Candidate 49
Job 3 allocated to Candidate 83
Job 4 allocated to Candidate 81
Job 5 allocated to Candidate 25
Job 6 allocated to Candidate 36
Job 7 allocated to Candidate 92
Job 9 allocated to Candidate 50
Job 10 allocated to Candidate 68
Job 11 allocated to Candidate 6
Job 12 allocated to Candidate 44
Job 13 allocated to Candidate 9
Job 16 allocated to Candidate 14
Job 17 allocated to Candidate 90
Job 18 allocated to Candidate 71
Job 19 allocated to Candidate 65
Job 20 allocated to Candidate 88
Job 21 allocated to Candidate 29
Job 22 allocated to Candidate 21
Job 23 allocated to Candidate 66
Job 24 allocated to Candidate 95
Job 25 allocated to Candidate 24
Job 27 allocated to Candidate 73
Job 28 allocated to Candidate 2
Job 30 allocated to Candidate 13
Job 32 allocated to Candidate 67
Job 33 allocated to Candidate 52
Jo

In [25]:
results_parameters_dqn_endorse

Unnamed: 0,candidates,jobs,data_seed,dqn_seed,batch_size,gamma,epsilon,epsilon_min,epsilon_decay,learning_rate,total_reward,data
0,100,10,1,1,16,0.90,1,0.100,0.985,0.0005,-291420.7,initial_data
1,100,10,1,1,16,0.90,1,0.100,0.985,0.0010,-293476.6,initial_data
2,100,10,1,1,16,0.90,1,0.100,0.995,0.0005,-300666.9,initial_data
3,100,10,1,1,16,0.90,1,0.100,0.995,0.0010,-290977.2,initial_data
4,100,10,1,1,16,0.90,1,0.001,0.985,0.0005,-289282.6,initial_data
...,...,...,...,...,...,...,...,...,...,...,...,...
859,100,50,1,1,64,0.99,0.5,0.100,0.995,0.0010,-1804861.2,parity_data
860,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0005,-1682439.3,parity_data
861,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0010,-1893003.1,parity_data
862,100,50,1,1,64,0.99,0.5,0.001,0.995,0.0005,-1791123.3,parity_data


## 6. PARITY DATA

In [14]:
# Create a parity data set
women_df = candidates_df[candidates_df['Gender'] == 1]
men_df = candidates_df[candidates_df['Gender'] == 2]

sampled_men_df = men_df.sample(n=len(women_df), random_state=1)
parity_df = pd.concat([women_df, sampled_men_df], ignore_index=True)

parity_df['Gender'].value_counts()

1    2329
2    2329
Name: Gender, dtype: int64

In [15]:
# Initialise the parity data candidate objects

candidate_list = []
for i in range(len(parity_df)):
    candidate = Candidate(i+1, parity_df['Gender'][i], parity_df['Degree_Bachelor'][i], parity_df['Degree_Master'][i], parity_df['Degree_Other'][i], 
                          parity_df['Software_Programming'][i], parity_df['C_Programming'][i], parity_df['Python_Programming'][i], 
                          parity_df['JavaScript_Programming'][i], parity_df['Professional_Software_Experience'][i], 
                          parity_df['Management_Skills'][i], parity_df['Engineer'][i], int(parity_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

In [16]:
# Sample data sets

random.seed(1)
sample_candidates_100_parity = random.sample(candidate_list, 100)

In [17]:
# Execute the optimisation function on Data Set 1
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_10, sample_candidates_100_parity, "parity_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 99
Job 2 allocated to Candidate 14
Job 3 allocated to Candidate 81
Job 4 allocated to Candidate 19
Job 5 allocated to Candidate 7
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 71

Gender distribution:
   Woman  Man
0      1    6
None
Reward:  -146150.9
   candidates  jobs         data  data_seed  dqn_seed  batch_size  gamma  \
0         100    10  parity_data          1         1          64   0.99   

   epsilon  epsilon_min  epsilon_decay  learning_rate  total_reward  
0      0.5        0.001          0.995          0.001     -146150.9  


In [18]:
# Execute the optimisation function on Data Set 2
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_20, sample_candidates_100_parity, "parity_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 7
Job 2 allocated to Candidate 23
Job 3 allocated to Candidate 81
Job 4 allocated to Candidate 70
Job 5 allocated to Candidate 71
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 11
Job 10 allocated to Candidate 9
Job 11 allocated to Candidate 99
Job 12 allocated to Candidate 22
Job 13 allocated to Candidate 19
Job 16 allocated to Candidate 88
Job 17 allocated to Candidate 16
Job 18 allocated to Candidate 30
Job 19 allocated to Candidate 14

Gender distribution:
   Woman  Man
0      6    9
None
Reward:  -466931.8
   candidates  jobs         data  data_seed  dqn_seed  batch_size  gamma  \
0         100    20  parity_data          1         1          64   0.99   

   epsilon  epsilon_min  epsilon_decay  learning_rate  total_reward  
0      0.5        0.001          0.995          0.001     -466931.8  


In [23]:
# Execute the optimisation function on Data Set 3
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_50, sample_candidates_100_parity, "parity_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 7
Job 2 allocated to Candidate 23
Job 3 allocated to Candidate 19
Job 4 allocated to Candidate 25
Job 5 allocated to Candidate 29
Job 7 allocated to Candidate 54
Job 9 allocated to Candidate 28
Job 10 allocated to Candidate 9
Job 11 allocated to Candidate 71
Job 12 allocated to Candidate 18
Job 13 allocated to Candidate 96
Job 16 allocated to Candidate 92
Job 17 allocated to Candidate 27
Job 18 allocated to Candidate 30
Job 19 allocated to Candidate 35
Job 20 allocated to Candidate 69
Job 21 allocated to Candidate 84
Job 22 allocated to Candidate 87
Job 23 allocated to Candidate 83
Job 24 allocated to Candidate 58
Job 27 allocated to Candidate 22
Job 28 allocated to Candidate 99
Job 30 allocated to Candidate 66
Job 32 allocated to Candidate 16
Job 34 allocated to Candidate 70
Job 35 allocated to Candidate 62
Job 37 allocated to Candidate 81
Job 40 allocated to Candidate 1

In [26]:
results_parameters_dqn_endorse

Unnamed: 0,candidates,jobs,data_seed,dqn_seed,batch_size,gamma,epsilon,epsilon_min,epsilon_decay,learning_rate,total_reward,data
0,100,10,1,1,16,0.90,1,0.100,0.985,0.0005,-291420.7,initial_data
1,100,10,1,1,16,0.90,1,0.100,0.985,0.0010,-293476.6,initial_data
2,100,10,1,1,16,0.90,1,0.100,0.995,0.0005,-300666.9,initial_data
3,100,10,1,1,16,0.90,1,0.100,0.995,0.0010,-290977.2,initial_data
4,100,10,1,1,16,0.90,1,0.001,0.985,0.0005,-289282.6,initial_data
...,...,...,...,...,...,...,...,...,...,...,...,...
859,100,50,1,1,64,0.99,0.5,0.100,0.995,0.0010,-1804861.2,parity_data
860,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0005,-1682439.3,parity_data
861,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0010,-1893003.1,parity_data
862,100,50,1,1,64,0.99,0.5,0.001,0.995,0.0005,-1791123.3,parity_data


## 7. PROPORTIONAL DATA

In [27]:
# Create a proportional data set, where women make up 18%
total_women = len(women_df)
women_percentage = 0.18

new_dataset_size = round(total_women/women_percentage)
required_men = new_dataset_size - total_women

sampled_men_df = men_df.sample(n=required_men, random_state=1)
proportional_df = pd.concat([women_df, sampled_men_df], ignore_index=True)

proportional_df['Gender'].value_counts()

2    10610
1     2329
Name: Gender, dtype: int64

In [28]:
candidate_list = []
for i in range(len(proportional_df)):
    candidate = Candidate(i+1, proportional_df['Gender'][i], proportional_df['Degree_Bachelor'][i], proportional_df['Degree_Master'][i], proportional_df['Degree_Other'][i], 
                          proportional_df['Software_Programming'][i], proportional_df['C_Programming'][i], proportional_df['Python_Programming'][i], 
                          proportional_df['JavaScript_Programming'][i], proportional_df['Professional_Software_Experience'][i], 
                          proportional_df['Management_Skills'][i], proportional_df['Engineer'][i], int(proportional_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

print("Job Data:", len(job_list), " Candidate Data:", len(candidate_list))

Job Data: 806  Candidate Data: 12939


In [29]:
# Sample data sets

random.seed(1)
sample_candidates_100_proportional = random.sample(candidate_list, 100)

In [39]:
# Execute the optimisation function on Data Set 1
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_10, sample_candidates_100_proportional, "proportional_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 48
Job 1 allocated to Candidate 64
Job 2 allocated to Candidate 29
Job 3 allocated to Candidate 31
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 68
Job 6 allocated to Candidate 70
Job 7 allocated to Candidate 28
Job 9 allocated to Candidate 20

Gender distribution:
   Woman  Man
0      3    6
None
Reward:  -180328.59999999998
   candidates  jobs             data  data_seed  dqn_seed  batch_size  gamma  \
0         100    10  normalised_data          1         1          64   0.99   

   epsilon  epsilon_min  epsilon_decay  learning_rate  total_reward  
0      0.5        0.001          0.995          0.001     -180328.6  


In [41]:
# Execute the optimisation function on Data Set 2
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_20, sample_candidates_100_proportional, "proportional_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 48
Job 1 allocated to Candidate 86
Job 2 allocated to Candidate 29
Job 3 allocated to Candidate 68
Job 4 allocated to Candidate 22
Job 5 allocated to Candidate 20
Job 6 allocated to Candidate 70
Job 7 allocated to Candidate 28
Job 9 allocated to Candidate 45
Job 10 allocated to Candidate 12
Job 11 allocated to Candidate 56
Job 12 allocated to Candidate 32
Job 13 allocated to Candidate 31
Job 14 allocated to Candidate 64
Job 16 allocated to Candidate 17
Job 17 allocated to Candidate 60
Job 18 allocated to Candidate 7
Job 19 allocated to Candidate 75

Gender distribution:
   Woman  Man
0      6   12
None
Reward:  -538482.8
   candidates  jobs             data  data_seed  dqn_seed  batch_size  gamma  \
0         100    20  normalised_data          1         1          64   0.99   

   epsilon  epsilon_min  epsilon_decay  learning_rate  total_reward  
0      0.5        0.001 

In [45]:
# Execute the optimisation function on Data Set 3
results_parameters_dqn_endorse = run_optimisation_endorse(sample_jobs_50, sample_candidates_100_proportional, "proportional_data", batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn_endorse)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 63
Job 2 allocated to Candidate 75
Job 3 allocated to Candidate 98
Job 4 allocated to Candidate 22
Job 5 allocated to Candidate 66
Job 6 allocated to Candidate 74
Job 7 allocated to Candidate 86
Job 9 allocated to Candidate 57
Job 10 allocated to Candidate 12
Job 11 allocated to Candidate 36
Job 12 allocated to Candidate 56
Job 13 allocated to Candidate 58
Job 14 allocated to Candidate 52
Job 16 allocated to Candidate 17
Job 17 allocated to Candidate 9
Job 18 allocated to Candidate 29
Job 19 allocated to Candidate 55
Job 20 allocated to Candidate 32
Job 21 allocated to Candidate 38
Job 22 allocated to Candidate 88
Job 23 allocated to Candidate 79
Job 24 allocated to Candidate 11
Job 25 allocated to Candidate 70
Job 27 allocated to Candidate 45
Job 28 allocated to Candidate 42
Job 30 allocated to Candidate 94
Job 32 allocated to Candidate 92
Job 33 allocated to Candidate 2

## 7. SAVE THE RESULTS

In [46]:
results_parameters_dqn_endorse

Unnamed: 0,candidates,jobs,data_seed,dqn_seed,batch_size,gamma,epsilon,epsilon_min,epsilon_decay,learning_rate,total_reward,data
0,100,10,1,1,16,0.90,1,0.100,0.985,0.0005,-291420.7,initial_data
1,100,10,1,1,16,0.90,1,0.100,0.985,0.0010,-293476.6,initial_data
2,100,10,1,1,16,0.90,1,0.100,0.995,0.0005,-300666.9,initial_data
3,100,10,1,1,16,0.90,1,0.100,0.995,0.0010,-290977.2,initial_data
4,100,10,1,1,16,0.90,1,0.001,0.985,0.0005,-289282.6,initial_data
...,...,...,...,...,...,...,...,...,...,...,...,...
1291,100,50,1,1,64,0.99,0.5,0.100,0.995,0.0010,-3863731.1,normalised_data
1292,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0005,-2707515.0,normalised_data
1293,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0010,-3376327.2,normalised_data
1294,100,50,1,1,64,0.99,0.5,0.001,0.995,0.0005,-3285409.4,normalised_data


In [47]:
# Save the results to a csv file
results_parameters_dqn_endorse.to_csv("results_parameters_dqn_endorse.csv")
print("Results saved to results_parameters_dqn_endorse.csv")

Results saved to results_parameters_dqn_debiased.csv
