In [2]:
"""
This script finds the best hyperparameter combination using random Search

This script may takes several days to complete
If FAST_MODE = True, it will directly read the fitness of a chromosome from 'results/fitness_table' generated by Grid Search

"""

import numpy as np
import pandas as pd
from casper_test import rkf_validator
from utilities import import_data
from utilities import set_seed
from sklearn.model_selection import train_test_split
from hypers_grid_search import HYPERPARAMETERS_CANDIDATES
from hypers_grid_search import SEARCH_SPACE_SIZE
from hypers_grid_search import chromosome_to_hyperparameters

# Use the result from grid search when calculate the score of a chromosome
FAST_MODE = True

DNA_SIZE = 12

DEVICE = 'cpu'

# set seed
SEED = None
if SEED is not None:
    set_seed(SEED)

# Number of training for each hyperparameter combination
N_SPLITS = 10
N_REPEATS = 4

# The scores for all 4096 possible chromosomes
FITNESS_TABLE = pd.read_csv('results/fitness_table.csv', index_col=0, dtype={0: str})


def random_search_test():
    """
    Randomly sample hyperparameters and evaluate them
    """

    top_10s = FITNESS_TABLE.sort_values(by='MSE', ascending=True).head(10)

    num_gen_to_get_the_best = 0
    num_gen_to_get_top_10 = 0
    top_10_obtained = False

    n_sample = 1
    while(True):
        chromosome = ''.join(map(str, np.random.randint(2, size=DNA_SIZE)))

        if not top_10_obtained and chromosome in top_10s.index:
            top_10_obtained = True
            num_gen_to_get_top_10 = n_sample
        
        if chromosome == top_10s.index[0]:
            num_gen_to_get_the_best = n_sample
            break

        n_sample += 1

    return num_gen_to_get_the_best, num_gen_to_get_top_10

In [3]:
# Run GA search 2000 times
NUM_TEST = 2000
result = []
for i in range(1, NUM_TEST + 1):
    num_gen_to_get_the_best, num_gen_to_get_top_10 = random_search_test()
    result.append([num_gen_to_get_the_best, num_gen_to_get_top_10])
    if i % 200 == 0:
        print(f'{i}/{NUM_TEST}   Number of Gen to get the best chromosome: {num_gen_to_get_the_best}   Number of Gen to get one of the top 10s: {num_gen_to_get_top_10}')
result = np.array(result)

num_gen_to_get_the_best = result[:, 0]
num_gen_to_get_top_10 = result[:, 1]

print()
print("Number of Gen to get the best chromosome:")
print(f'Mean: {np.mean(num_gen_to_get_the_best)}')
print(f'Median: {np.median(num_gen_to_get_the_best)}')
print(f'Standard Deviation: {np.std(num_gen_to_get_the_best)}')
print()
print("Number of Gen to get one of the top 10s:")
print(f'Mean: {np.mean(num_gen_to_get_top_10)}')
print(f'Median: {np.median(num_gen_to_get_top_10)}')
print(f'Standard Deviation: {np.std(num_gen_to_get_top_10)}')

200/2000   Number of Gen to get the best chromosome: 508   Number of Gen to get one of the top 10s: 427
400/2000   Number of Gen to get the best chromosome: 550   Number of Gen to get one of the top 10s: 327
600/2000   Number of Gen to get the best chromosome: 2604   Number of Gen to get one of the top 10s: 212
800/2000   Number of Gen to get the best chromosome: 5427   Number of Gen to get one of the top 10s: 44
1000/2000   Number of Gen to get the best chromosome: 12584   Number of Gen to get one of the top 10s: 794
1200/2000   Number of Gen to get the best chromosome: 391   Number of Gen to get one of the top 10s: 155
1400/2000   Number of Gen to get the best chromosome: 16629   Number of Gen to get one of the top 10s: 475
1600/2000   Number of Gen to get the best chromosome: 200   Number of Gen to get one of the top 10s: 200
1800/2000   Number of Gen to get the best chromosome: 3990   Number of Gen to get one of the top 10s: 234
2000/2000   Number of Gen to get the best chromosome: