In [1]:
"""
This script finds the best hyperparameter combination using random Search

This script may takes several days to complete
If FAST_MODE = True, it will directly read the fitness of a chromosome from 'results/fitness_table' generated by Grid Search

"""

import numpy as np
import pandas as pd
from utilities import set_seed
from hypers_grid_search import HYPERPARAMETERS_CANDIDATES
from hypers_grid_search import SEARCH_SPACE_SIZE


DNA_SIZE = 15

# set seed
SEED = None
if SEED is not None:
    set_seed(SEED)


# The scores for all 4096 possible chromosomes
FITNESS_TABLE = pd.read_csv('results/feature_selection_fitness.csv', index_col=0, dtype={0: str})

def random_search_test():
    """
    Randomly sample hyperparameters and evaluate them
    """

    top_10s = FITNESS_TABLE.sort_values(by='MSE', ascending=True).head(10)

    num_gen_to_get_the_best = 0
    num_gen_to_get_top_10 = 0
    top_10_obtained = False

    n_sample = 1
    while(True):
        chromosome = ''.join(map(str, np.random.randint(2, size=DNA_SIZE)))

        if not top_10_obtained and chromosome in top_10s.index:
            top_10_obtained = True
            num_gen_to_get_top_10 = n_sample
        
        if chromosome == top_10s.index[0]:
            num_gen_to_get_the_best = n_sample
            break

        n_sample += 1

    return num_gen_to_get_the_best, num_gen_to_get_top_10

In [2]:
# Run GA search 2000 times
NUM_TEST = 2000
result = []
for i in range(1, NUM_TEST + 1):
    num_gen_to_get_the_best, num_gen_to_get_top_10 = random_search_test()
    result.append([num_gen_to_get_the_best, num_gen_to_get_top_10])
    if i % 20 == 0:
        print(f'{i}/{NUM_TEST}   Number of Gen to get the best chromosome: {num_gen_to_get_the_best}   Number of Gen to get one of the top 10s: {num_gen_to_get_top_10}')
result = np.array(result)

num_gen_to_get_the_best = result[:, 0]
num_gen_to_get_top_10 = result[:, 1]

20/2000   Number of Gen to get the best chromosome: 1530   Number of Gen to get one of the top 10s: 1530
40/2000   Number of Gen to get the best chromosome: 179990   Number of Gen to get one of the top 10s: 1196
60/2000   Number of Gen to get the best chromosome: 30271   Number of Gen to get one of the top 10s: 5934
80/2000   Number of Gen to get the best chromosome: 893   Number of Gen to get one of the top 10s: 893
100/2000   Number of Gen to get the best chromosome: 30930   Number of Gen to get one of the top 10s: 13363
120/2000   Number of Gen to get the best chromosome: 13314   Number of Gen to get one of the top 10s: 5507
140/2000   Number of Gen to get the best chromosome: 9637   Number of Gen to get one of the top 10s: 985
160/2000   Number of Gen to get the best chromosome: 38350   Number of Gen to get one of the top 10s: 326
180/2000   Number of Gen to get the best chromosome: 1845   Number of Gen to get one of the top 10s: 1283
200/2000   Number of Gen to get the best chromo

In [3]:
print()
print("Number of Gen to get the best chromosome:")
print(f'Mean: {np.mean(num_gen_to_get_the_best)}')
print(f'Median: {np.median(num_gen_to_get_the_best)}')
print(f'Standard Deviation: {np.std(num_gen_to_get_the_best)}')
print()
print("Number of Gen to get one of the top 10s:")
print(f'Mean: {np.mean(num_gen_to_get_top_10)}')
print(f'Median: {np.median(num_gen_to_get_top_10)}')
print(f'Standard Deviation: {np.std(num_gen_to_get_top_10)}')


Number of Gen to get the best chromosome:
Mean: 32065.2475
Median: 21581.5
Standard Deviation: 31896.728752165855

Number of Gen to get one of the top 10s:
Mean: 3233.266
Median: 2297.0
Standard Deviation: 3193.9749445235166
