In [1]:
import pandas as pd
import numpy as np
from numpy.random import default_rng
import random

import create_test_data
import create_gene
import calculate_fitness
import select_mating_pool
import crossover

rng = default_rng()

In [4]:
# create test subjects and test populations

n_options = 15 # number of different options
n_people = 150 # number of people

n_genes = 1000 # number of genes (population)

n_iterations = 100

options = [f'option {i}' for i in range(n_options)]

multipliers = [10, 2, 1]
counter = [0, 0, 0]

choices_fd = create_test_data.get_choices(options, n_people)

genes = {}
for i in range(n_genes):
    id = f'GID{i:05}'
    gene_fd = create_gene.get_gene(options, n_people)

    opt_slots = {option: 1.5 * n_people//n_options for option in options if gene_fd['module'].eq(option).any()}
    fitness = calculate_fitness.calc_fitness(choices_fd, gene_fd, options, n_people, multipliers, counter)

    genes[id] = [gene_fd, fitness]

    if i % (n_genes // 10) == 0:
        print(f'gene {i} of {n_genes}')

print(np.mean([genes[g][1] for g in genes]))

frac_elite = 0.2
frac_lucky = 0.02

for i in range(n_iterations):
    pool = select_mating_pool.get_mating_pool(genes, frac_elite, frac_lucky)

    offsprings = crossover.get_offspring(pool, choices_fd, options, n_genes, multipliers, counter)

    genes = offsprings

    if i % (n_iterations // 10) == 0:
        # print(f'iteration {i} of {n_iterations}')
        print(np.mean([genes[g][1] for g in genes]))

    # print(np.mean([offsprings[g][1] for g in offsprings]))

print(np.mean([genes[g][1] for g in genes]))

best = genes[select_mating_pool.get_best(genes)][0]

first = 0
second = 0
third = 0

first = (choices_fd['first_choice'] == best['module']).sum()

for c, g, s in zip(choices_fd['second_choices'], best['module'], choices_fd['score']):
    if g in c:
        second += 1

for c, g, s in zip(choices_fd['third_choices'], best['module'], choices_fd['score']):
    if g in c:
        third += 1

print(f'{first = }, {second = }, {third = }')
print(best.value_counts().head(15))

# print(choices_fd)
# print(gene_fd)

gene 0 of 1000
gene 100 of 1000
gene 200 of 1000
gene 300 of 1000
gene 400 of 1000
gene 500 of 1000
gene 600 of 1000
gene 700 of 1000
gene 800 of 1000
gene 900 of 1000
6050.972
6841.79
13142.144
17001.184
19666.343
21135.339
21346.0
21346.0
21346.0
21346.0
21346.0
21346.0
first = 84, second = 34, third = 61
module   
option 1     16
option 7     16
option 4     15
option 9     13
option 3     12
option 8     12
option 11    10
option 14    10
option 6     10
option 13     9
option 2      8
option 10     6
option 12     6
option 0      4
option 5      3
dtype: int64


In [None]:
for g in genes:
    print(genes[g][0]['module'].value_counts().head(3))

In [None]:
%%timeit
value = 1.5 * n_people//n_options
opt_slots = {option: value for option in options if gene_fd['module'].eq(option).any()}


In [None]:
%%timeit

# count 1st, 2nd, 3rd choices (method 1)

test_fd = pd.DataFrame(choices_fd[['first_choice', 'second_choices', 'third_choices']])
test_fd['module'] = gene_fd['module']

count_first = (test_fd['first_choice'] == test_fd['module']).sum()
test_array = test_fd.explode('second_choices')
count_second = (test_array['second_choices'] == test_array['module']).sum()
test_array = test_fd.explode('third_choices')
count_third = (test_array['third_choices'] == test_array['module']).sum()

In [None]:
fil1 = gene_fd['module'] == choices_fd['first_choice']
fil2 = gene_fd['module'].astype(str).isin(choices_fd['second_choices'].astype(str))

fil2

In [None]:
for g, c in zip(gene_fd['module'], choices_fd['second_choices']):
    print(g in c)

In [None]:

n_options = 10
options = [f'option {i}' for i in range(n_options)]

n_people = 150

id_list = []
first_list = []
second_list = []
third_list = []

for i in range(n_people):
    id = f'ID{i:04}'

    n2 = rng.integers(n_options)
    n3 = rng.integers(n_options)

    first = random.choice(options)
    seconds = random.sample(options, k=n2)
    thirds = random.sample(options, k=n3)

    if first in seconds:
        seconds.remove(first)

    first_seconds = set(seconds).union(first)
    for item in first_seconds:
        if item in thirds:
            thirds.remove(item)

    id_list.append(id)
    first_list.append(first)
    second_list.append(seconds)
    third_list.append(thirds)

dict = {'ID': id_list, 'first_choice': first_list, 'second_choices': second_list, 'third_choices': third_list}

df = pd.DataFrame(dict)