<h1 style="text-align: center;">OPTIMISATION DQN PARAMETERS (INITIAL DATA)</h1>

## 0. PACKAGE REQUIREMENTS

In [1]:
# Packages
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
from IPython.display import clear_output
import warnings

# DQN Requirements
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import namedtuple

# Model imports
import sys
import os

In [2]:
warnings.filterwarnings("ignore", message=".*To copy construct from a tensor.*")

In [3]:
# Environment requirements
module_path = os.path.abspath(os.path.join('..', '..', 'environment'))
if module_path not in sys.path:
    sys.path.append(module_path)
from environment_basic import Job, Candidate, Environment

# Agent requirements
module_path = os.path.abspath(os.path.join('..', '..', 'agents'))
if module_path not in sys.path:
    sys.path.append(module_path)
from agents_basic import DQNAgent, DQN, ReplayBuffer

# Tool requirements
module_path = os.path.abspath(os.path.join('..', '..', 'tools'))
if module_path not in sys.path:
    sys.path.append(module_path)
from tools import calculate_sampled_men, calculate_hired_men, get_best_allocation, visualize_loss, set_seed

## 1. DQN EPISODE FUNCTIONS

In [4]:
def run_dqn_agent_optimisation(sample_jobs, sample_candidates, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate, num_episodes, show_schedule = False):
    # Initialize the environment
    env = Environment(sample_jobs, sample_candidates)
    state = env.reset()
    state_tensor = torch.tensor(state.to_tensor(), dtype=torch.float32).unsqueeze(0)

    # Initialise the state and action sizes
    state_size = state.num_jobs * state.num_candidates
    action_size = len(state.available_actions)

    episode_rewards = []
    loss_list = []
    iteration = 0

    # Initialise the agent
    agent = DQNAgent(state_size, action_size, env, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate)

    for _ in range(num_episodes):
        iteration += 1
        state = env.reset()
        state_tensor = torch.tensor(state.to_tensor(), dtype=torch.float32).unsqueeze(0)
        total_reward = 0
        done = False

        while not done:
            action_tuple = agent.select_action(state_tensor, env.current_state.available_actions, env.action_indices)
            next_state, reward, done = env.step(action_tuple)
            next_state_tensor = torch.tensor(next_state.to_tensor(), dtype=torch.float32).unsqueeze(0)

            agent.store_transition(state_tensor, action_tuple, reward, next_state_tensor, done)
            state_tensor = next_state_tensor

            loss = agent.optimize_model()
            total_reward += reward

            if done:
                agent.update_target_net()
                episode_rewards.append(total_reward)
                break
        
        loss_list.append(loss)

    # Save the model
    agent.save_model('DQN_optimisation')
    agent.load_model('DQN_optimisation')

    gender_distribution, tot_reward = get_best_allocation(agent, env, show_schedule=True)
    
    #return episode_rewards, tot_reward, loss_list
    return gender_distribution, tot_reward

## 2. INITIALISE THE DATA

In [6]:
# Read in the job data
file_path = '../../data/cleaned_data/jobs_cleaned.csv'
jobs_df = pd.read_csv(file_path)

# Read in the candidate data
file_path = '../../data/cleaned_data/candidates_cleaned.csv'
candidates_df = pd.read_csv(file_path)

# Initialise the class objects with data from the dataframes
job_list = []
for i in range(len(jobs_df)):
    job = Job(i+1, jobs_df['Degree_Bachelor'][i], jobs_df['Degree_Master'][i], jobs_df['Degree_Other'][i],
              jobs_df['Software_Programming'][i], jobs_df['C_Programming'][i], jobs_df['Python_Programming'][i], 
              jobs_df['JavaScript_Programming'][i], jobs_df['Professional_Software_Experience'][i], 
              jobs_df['Management_Skills'][i], jobs_df['Engineer'][i], int(jobs_df['Minimum_Pay'][i]))
    job_list.append(job)

candidate_list = []
for i in range(len(candidates_df)):
    candidate = Candidate(i+1, candidates_df['Gender'][i], candidates_df['Degree_Bachelor'][i], candidates_df['Degree_Master'][i], candidates_df['Degree_Other'][i], 
                          candidates_df['Software_Programming'][i], candidates_df['C_Programming'][i], candidates_df['Python_Programming'][i], 
                          candidates_df['JavaScript_Programming'][i], candidates_df['Professional_Software_Experience'][i], 
                          candidates_df['Management_Skills'][i], candidates_df['Engineer'][i], int(candidates_df['Previous_Pay'][i]))
    candidate_list.append(candidate)

print("Job Data:", len(job_list), " Candidate Data:", len(candidate_list))

Job Data: 806  Candidate Data: 49005


## 3. LOOP THROUGH PARAMETERS

In [7]:
# 144 combinations
batch_size_values = [16, 32, 64]
gamma_values = [0.9, 0.95, 0.99]
epsilon_values = [1, 0.5]
epsilon_min_values = [0.1, 0.001]
epsilon_decay_values = [0.985, 0.995]
learning_rate_values = [0.0005, 0.001]

columns = ["candidates", "jobs", "data_seed", "dqn_seed", "batch_size", "gamma", "epsilon", "epsilon_min", "epsilon_decay", "learning_rate", "total_reward"]
results_parameters_dqn = pd.DataFrame(columns=columns)

In [8]:
def run_optimisation(sample_jobs, sample_candidates, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_df):
    num_combinations = len(batch_size_values) * len(gamma_values) * len(epsilon_values) * len(epsilon_min_values) * len(epsilon_decay_values) * len(learning_rate_values)
    current_iteration = 0
    
    for batch_size in batch_size_values:
        for gamma in gamma_values:
            for epsilon in epsilon_values:
                for epsilon_min in epsilon_min_values:
                    for epsilon_decay in epsilon_decay_values:
                        for learning_rate in learning_rate_values:
                            clear_output()

                            current_iteration += 1
                            print(f"Iteration {current_iteration}/{num_combinations}; Testing combination {batch_size}, {gamma}, {epsilon}, {epsilon_min}, {epsilon_decay}, {learning_rate}")

                            set_seed(1)

                            gender_distribution, total_reward = run_dqn_agent_optimisation(sample_jobs, sample_candidates, batch_size, gamma, epsilon, epsilon_min, epsilon_decay, learning_rate, num_episodes=1000)
                            
                            # Add the results to the dqn_results_df dataframe
                            new_row = pd.DataFrame([{"candidates": len(sample_candidates), "jobs": len(sample_jobs), "data_seed": 1, "dqn_seed": 1, 
                                                     "batch_size": batch_size, "gamma": gamma, "epsilon": epsilon, "epsilon_min": epsilon_min,
                                                     "epsilon_decay": epsilon_decay, "learning_rate": learning_rate, "total_reward": total_reward}])
                            print(new_row)
                            results_df = pd.concat([results_df, new_row], ignore_index=True)

    return results_df

In [9]:
# Sample Candidates
random.seed(1)
sample_candidates_100 = random.sample(candidate_list, 100)

# Data set 1: 10 jobs, 100 candidates
random.seed(1)
sample_jobs_10 = random.sample(job_list, 10)

# Data set 2: 20 jobs, 100 candidates
random.seed(1)
sample_jobs_20 = random.sample(job_list, 20)

# Dataset 3: 50 jobs, 100 candidates
random.seed(1)
sample_jobs_50 = random.sample(job_list, 50)

In [10]:
# Execute the optimisation function on Data Set 1
results_parameters_dqn = run_optimisation(sample_jobs_10, sample_candidates_100, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 12
Job 1 allocated to Candidate 16
Job 2 allocated to Candidate 86
Job 3 allocated to Candidate 95
Job 4 allocated to Candidate 56
Job 5 allocated to Candidate 25
Job 6 allocated to Candidate 24
Job 7 allocated to Candidate 81
Job 9 allocated to Candidate 13

Gender distribution:
   Woman  Man
0      1    8
None
Reward:  -211399
   candidates  jobs  data_seed  dqn_seed  batch_size  gamma  epsilon  \
0         100    10          1         1          64   0.99      0.5   

   epsilon_min  epsilon_decay  learning_rate  total_reward  
0        0.001          0.995          0.001       -211399  


In [10]:
# Execute the optimisation function on Data Set 2
results_parameters_dqn = run_optimisation(sample_jobs_20, sample_candidates_100, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 12
Job 1 allocated to Candidate 16
Job 2 allocated to Candidate 71
Job 3 allocated to Candidate 95
Job 4 allocated to Candidate 13
Job 5 allocated to Candidate 52
Job 6 allocated to Candidate 24
Job 7 allocated to Candidate 81
Job 9 allocated to Candidate 78
Job 10 allocated to Candidate 68
Job 11 allocated to Candidate 73
Job 12 allocated to Candidate 25
Job 13 allocated to Candidate 56
Job 14 allocated to Candidate 74
Job 15 allocated to Candidate 7
Job 16 allocated to Candidate 90
Job 17 allocated to Candidate 31
Job 18 allocated to Candidate 49
Job 19 allocated to Candidate 86

Gender distribution:
   Woman  Man
0      2   17
None
Reward:  -678019
   candidates  jobs  data_seed  dqn_seed  batch_size  gamma  epsilon  \
0         100    20          1         1          64   0.99      0.5   

   epsilon_min  epsilon_decay  learning_rate        total_reward  
0        0.0

In [11]:
# Execute the optimisation function on Data Set 3
results_parameters_dqn = run_optimisation(sample_jobs_50, sample_candidates_100, batch_size_values, gamma_values, epsilon_values, epsilon_min_values, epsilon_decay_values, learning_rate_values, results_parameters_dqn)

Iteration 144/144; Testing combination 64, 0.99, 0.5, 0.001, 0.995, 0.001
Allocations:
Job 0 allocated to Candidate 89
Job 2 allocated to Candidate 50
Job 3 allocated to Candidate 25
Job 4 allocated to Candidate 81
Job 5 allocated to Candidate 12
Job 6 allocated to Candidate 24
Job 7 allocated to Candidate 0
Job 9 allocated to Candidate 31
Job 10 allocated to Candidate 68
Job 11 allocated to Candidate 84
Job 12 allocated to Candidate 64
Job 13 allocated to Candidate 9
Job 14 allocated to Candidate 16
Job 15 allocated to Candidate 36
Job 16 allocated to Candidate 67
Job 17 allocated to Candidate 49
Job 18 allocated to Candidate 71
Job 19 allocated to Candidate 86
Job 20 allocated to Candidate 73
Job 21 allocated to Candidate 21
Job 22 allocated to Candidate 44
Job 23 allocated to Candidate 97
Job 24 allocated to Candidate 13
Job 25 allocated to Candidate 46
Job 27 allocated to Candidate 56
Job 28 allocated to Candidate 92
Job 30 allocated to Candidate 83
Job 32 allocated to Candidate 95

## 4. STORE RESULTS

In [14]:
results_parameters_dqn

Unnamed: 0,candidates,jobs,data_seed,dqn_seed,batch_size,gamma,epsilon,epsilon_min,epsilon_decay,learning_rate,total_reward
0,100,10,1,1,16,0.90,1,0.100,0.985,0.0005,"([Woman], -232999)"
1,100,10,1,1,16,0.90,1,0.100,0.985,0.0010,"([Woman], -211399)"
2,100,10,1,1,16,0.90,1,0.100,0.995,0.0005,"([Woman], -224459)"
3,100,10,1,1,16,0.90,1,0.100,0.995,0.0010,"([Woman], -211399)"
4,100,10,1,1,16,0.90,1,0.001,0.985,0.0005,"([Woman], -211399)"
...,...,...,...,...,...,...,...,...,...,...,...
427,100,50,1,1,64,0.99,0.5,0.100,0.995,0.0010,"([Woman], -2380926)"
428,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0005,"([Woman], -2296854)"
429,100,50,1,1,64,0.99,0.5,0.001,0.985,0.0010,"([Woman], -2399549)"
430,100,50,1,1,64,0.99,0.5,0.001,0.995,0.0005,"([Woman], -2502306)"


In [16]:
#ONLY BECAUSE I MADE A MISTAKE WITH get_best_allocation
results_parameters_dqn['allocation'] = results_parameters_dqn['total_reward'].apply(lambda x: x[0])
results_parameters_dqn['total_reward'] = results_parameters_dqn['total_reward'].apply(lambda x: x[1])

In [18]:
results_parameters_dqn['total_reward'].max()

-204513

In [19]:
# Save the results to a csv file
results_parameters_dqn.to_csv("results_parameters_dqn_initial.csv")
print("Results saved to results_parameters_dqn_initial.csv")

Results saved to results_parameters_dqn_initial.csv
