<h1 style="text-align: center;">OPTIMISATION DQN PARAMETERS (PARITY DATA)</h1>

## 0. PACKAGE REQUIREMENTS

In [1]:
# Packages
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
from IPython.display import clear_output
import warnings

# DQN Requirements
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import namedtuple

# Model imports
import sys
import os

In [2]:
warnings.filterwarnings("ignore", message=".*To copy construct from a tensor.*")

In [3]:
# Environment requirements
module_path = os.path.abspath(os.path.join('..', '..', 'environment'))
if module_path not in sys.path:
    sys.path.append(module_path)
from environment_basic import Job, Candidate, Environment

# Agent requirements
module_path = os.path.abspath(os.path.join('..', '..', 'agents'))
if module_path not in sys.path:
    sys.path.append(module_path)
from agents_basic import DQNAgent, DQN, ReplayBuffer

# Tool requirements
module_path = os.path.abspath(os.path.join('..', '..', 'tools'))
if module_path not in sys.path:
    sys.path.append(module_path)
from tools import calculate_sampled_men, calculate_hired_men, get_best_allocation, visualize_loss, set_seed

## 1. DQN EPISODE FUNCTIONS

In [5]:
def run_dqn_agent_optimisation(sample_jobs, sample_candidates, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate, num_episodes, show_schedule = False):
    # Initialize the environment
    env = Environment(sample_jobs, sample_candidates)
    state = env.reset()
    state_tensor = torch.tensor(state.to_tensor(), dtype=torch.float32).unsqueeze(0)

    # Initialise the state and action sizes
    state_size = state.num_jobs * state.num_candidates
    action_size = len(state.available_actions)

    episode_rewards = []
    loss_list = []
    iteration = 0

    # Initialise the agent
    agent = DQNAgent(state_size, action_size, env, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate)

    for _ in range(num_episodes):
        iteration += 1
        state = env.reset()
        state_tensor = torch.tensor(state.to_tensor(), dtype=torch.float32).unsqueeze(0)
        total_reward = 0
        done = False

        while not done:
            action_tuple = agent.select_action(state_tensor, env.current_state.available_actions, env.action_indices)
            next_state, reward, done = env.step(action_tuple)
            next_state_tensor = torch.tensor(next_state.to_tensor(), dtype=torch.float32).unsqueeze(0)

            agent.store_transition(state_tensor, action_tuple, reward, next_state_tensor, done)
            state_tensor = next_state_tensor

            loss = agent.optimize_model()
            total_reward += reward

            if done:
                agent.update_target_net()
                episode_rewards.append(total_reward)
                break
        
        loss_list.append(loss)

    # Save the model
    agent.save_model('DQN_optimisation')
    agent.load_model('DQN_optimisation')

    gender_distribution, tot_reward = get_best_allocation(agent, env, show_schedule=True)
    
    #return episode_rewards, tot_reward, loss_list
    return gender_distribution, tot_reward

## 2. INITIALISE THE DATA

In [4]:
# Read in the job data
file_path = '../../data/cleaned_data/jobs_cleaned.csv'
jobs_df = pd.read_csv(file_path)

# Read in the candidate data
file_path = '../../data/cleaned_data/candidates_cleaned.csv'
candidates_df = pd.read_csv(file_path)

# Create a parity data set
women_df = candidates_df[candidates_df['Gender'] == 1]
men_df = candidates_df[candidates_df['Gender'] == 2]

sampled_men_df = men_df.sample(n=len(women_df), random_state=1)
parity_df = pd.concat([women_df, sampled_men_df], ignore_index=True)

parity_df['Gender'].value_counts()

1    2329
2    2329
Name: Gender, dtype: int64

In [6]:
# Initialise the class objects with data from the dataframes
job_list = []
for i in range(len(jobs_df)):
    job = Job(i+1, jobs_df['Degree_Bachelor'][i], jobs_df['Degree_Master'][i], jobs_df['Degree_Other'][i],
              jobs_df['Software_Programming'][i], jobs_df['C_Programming'][i], jobs_df['Python_Programming'][i], 
              jobs_df['JavaScript_Programming'][i], jobs_df['Professional_Software_Experience'][i], 
              jobs_df['Management_Skills'][i], jobs_df['Engineer'][i], int(jobs_df['Minimum_Pay'][i]))
    job_list.append(job)

candidate_list = []
for i in range(len(parity_df)):
    candidate = Candidate(i+1, parity_df['Gender'][i], parity_df['Degree_Bachelor'][i], parity_df['Degree_Master'][i], parity_df['Degree_Other'][i], 
                          parity_df['Software_Programming'][i], parity_df['C_Programming'][i], parity_df['Python_Programming'][i], 
                          parity_df['JavaScript_Programming'][i], parity_df['Professional_Software_Experience'][i], 
                          parity_df['Management_Skills'][i], parity_df['Engineer'][i], int(parity_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

print("Job Data:", len(job_list), " Candidate Data:", len(candidate_list))

Job Data: 806  Candidate Data: 4658


## 3. LOOP THROUGH PARAMETERS

In [7]:
# 144 combinations
batch_size_values = [16, 32, 64]
gamma_values = [0.9, 0.95, 0.99]
epsilon_values = [1, 0.5]
epsilon_min_values = [0.1, 0.001]
epsilon_decay_values = [0.985, 0.995]
learning_rate_values = [0.0005, 0.001]

columns = ["candidates", "jobs", "data_seed", "dqn_seed", "batch_size", "gamma", "epsilon", "epsilon_min", "epsilon_decay", "learning_rate", "total_reward"]
results_parameters_dqn = pd.DataFrame(columns=columns)

In [8]:
def run_optimisation(sample_jobs, sample_candidates, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_df):
    num_combinations = len(batch_size_values) * len(gamma_values) * len(epsilon_values) * len(epsilon_min_values) * len(epsilon_decay_values) * len(learning_rate_values)
    current_iteration = 0
    
    for batch_size in batch_size_values:
        for gamma in gamma_values:
            for epsilon in epsilon_values:
                for epsilon_min in epsilon_min_values:
                    for epsilon_decay in epsilon_decay_values:
                        for learning_rate in learning_rate_values:
                            clear_output()

                            current_iteration += 1
                            print(f"Iteration {current_iteration}/{num_combinations}; Testing combination {batch_size}, {gamma}, {epsilon}, {epsilon_min}, {epsilon_decay}, {learning_rate}")

                            set_seed(1)

                            gender_distribution, total_reward = run_dqn_agent_optimisation(sample_jobs, sample_candidates, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate, num_episodes=1000)
                            
                            # Add the results to the dqn_results_df dataframe
                            new_row = pd.DataFrame([{"candidates": len(sample_candidates), "jobs": len(sample_jobs), "data_seed": 1, "dqn_seed": 1, 
                                                     "batch_size": batch_size, "gamma": gamma, "epsilon": epsilon, "epsilon_min": epsilon_min,
                                                     "epsilon_decay": epsilon_decay, "learning_rate": learning_rate, "total_reward": total_reward}])
                            print(new_row)
                            results_df = pd.concat([results_df, new_row], ignore_index=True)

    return results_df

In [9]:
# Sample Candidates
random.seed(1)
sample_candidates_100 = random.sample(candidate_list, 100)

# Data set 1: 10 jobs, 100 candidates
random.seed(1)
sample_jobs_10 = random.sample(job_list, 10)

# Data set 2: 20 jobs, 100 candidates
random.seed(1)
sample_jobs_20 = random.sample(job_list, 20)

# Dataset 3: 50 jobs, 100 candidates
random.seed(1)
sample_jobs_50 = random.sample(job_list, 50)

In [10]:
# Execute the optimisation function on Data Set 1
results_parameters_dqn = run_optimisation(sample_jobs_10, sample_candidates_100, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 7
Job 2 allocated to Candidate 23
Job 3 allocated to Candidate 99
Job 4 allocated to Candidate 71
Job 5 allocated to Candidate 19
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 70

Gender distribution:
   Woman  Man
0      0    7
None
Reward:  -101572
   candidates  jobs  data_seed  dqn_seed  batch_size  gamma  epsilon  \
0         100    10          1         1          64   0.99      0.5   

   epsilon_min  epsilon_decay  learning_rate        total_reward  
0        0.001          0.995          0.001  ([Woman], -101572)  


In [11]:
# Execute the optimisation function on Data Set 2
results_parameters_dqn = run_optimisation(sample_jobs_20, sample_candidates_100, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 91
Job 2 allocated to Candidate 14
Job 3 allocated to Candidate 69
Job 4 allocated to Candidate 71
Job 5 allocated to Candidate 70
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 81
Job 10 allocated to Candidate 55
Job 11 allocated to Candidate 7
Job 12 allocated to Candidate 19
Job 13 allocated to Candidate 99
Job 16 allocated to Candidate 88
Job 17 allocated to Candidate 18
Job 18 allocated to Candidate 30
Job 19 allocated to Candidate 23

Gender distribution:
   Woman  Man
0      4   11
None
Reward:  -393935
   candidates  jobs  data_seed  dqn_seed  batch_size  gamma  epsilon  \
0         100    20          1         1          64   0.99      0.5   

   epsilon_min  epsilon_decay  learning_rate        total_reward  
0        0.001          0.995          0.001  ([Woman], -393935)  


In [12]:
# Execute the optimisation function on Data Set 3
results_parameters_dqn = run_optimisation(sample_jobs_50, sample_candidates_100, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 99
Job 2 allocated to Candidate 23
Job 3 allocated to Candidate 7
Job 4 allocated to Candidate 59
Job 5 allocated to Candidate 16
Job 6 allocated to Candidate 54
Job 9 allocated to Candidate 78
Job 10 allocated to Candidate 55
Job 11 allocated to Candidate 22
Job 12 allocated to Candidate 27
Job 13 allocated to Candidate 35
Job 16 allocated to Candidate 92
Job 17 allocated to Candidate 18
Job 18 allocated to Candidate 30
Job 19 allocated to Candidate 69
Job 20 allocated to Candidate 67
Job 21 allocated to Candidate 95
Job 22 allocated to Candidate 91
Job 23 allocated to Candidate 83
Job 24 allocated to Candidate 32
Job 27 allocated to Candidate 28
Job 28 allocated to Candidate 73
Job 30 allocated to Candidate 62
Job 32 allocated to Candidate 88
Job 34 allocated to Candidate 71
Job 35 allocated to Candidate 19
Job 37 allocated to Candidate 81
Job 40 allocated to Candidate 

## 4. STORE RESULTS

In [30]:
# Save the results to a csv file
results_parameters_dqn.to_csv("results_parameters_dqn_parity.csv")
print("Results saved to results_parameters_dqn_parity.csv")

Results saved to results_parameters_dqn_parity.csv
