In [1]:
import pandas as pd
import numpy as np
from numpy.random import default_rng
import random
from itertools import chain
import pickle

import create_test_data
import create_gene
import calculate_fitness
import select_mating_pool
import crossover

from tqdm import tqdm

rng = default_rng()

In [5]:
# create test subjects and test populations

n_people = 150 # number of people

n_genes = 5000 # number of genes (population)

n_iterations = 1000

companies = {'Zeix': 20, 'SCS': 20, 'Kyburz': 20, 'Siemens': 20, 'Varian': 20, 'MAN':20, 'USZ': 20, 
             'Acht Grad Ost': 20, 'Weidmann': 20, 'True Wealth': 20, 'PartnerRe': 20, 'Sensirion': 20, 'Hocoma': 20}
uni1 = {'Architektur': 30, 'Biologie (ETH)': 60, 'Raum (ETH)': 16, 'Umwelt (ETH)': 16, 
        'Drohnen (UZH)': 40, 'Biodiversität (UZH)': 24, 'Physik (UZH)': 20}
uni2 = {'Chemie (ETH)': 25, 'Material (ETH)': 20, 'Medizin (ETH)': 50, 'Pharma (ETH)': 24, 'Physik (ETH)': 20,
        'Chemie (UZH)': 30, 'Informatik (UZH)': 20}
uni3 = {'CERN (UZH)': 30, 'Hyperwürfel (UZH)': 20, 'Krebsforschung (UZH)': 20, 
        'Informatik (ETH)': 16, 'Elektrotechnik (ETH)': 30, 'Maschinen (ETH)': 40, 'Mathematik (ETH)': 30}
workshops = {'Oberflächen und Farben': 20, 'Fourier': 20, 'Datenexploration': 20, 'Astrophysik': 20,
             'Molekulare Schalter': 12, 'Arduino': 12, 'Erneuerbar': 20, 'SYPT': 20, 'Big Data': 16, 
             'DNA': 20, 'Astronomie': 20, 'Recycling': 16, 'Molekulargenetik': 14, 'Energiespeicherung': 20}

blocks = {'companies': companies, 'uni1': uni1, 'uni2': uni2, 'uni3': uni3, 'workshops': workshops}
block_names = [block for block in blocks.keys()]

multipliers = [50, 2, 1]
counter = [0, 0, 0]

choices_df = create_test_data.get_choices(blocks, n_people)

print('Generating initial population:')
genes = {}
for i in tqdm(range(n_genes)):
    id = f'GID{i:05}'
    gene_df = create_gene.get_gene(blocks, n_people)

    fitness = calculate_fitness.calc_fitness(choices_df, gene_df, block_names, multipliers)

    genes[id] = [gene_df, fitness]


frac_elite = 0.1
frac_lucky = 0.01

print('Optimising population:')
for i in tqdm(range(n_iterations)):
    pool = select_mating_pool.get_mating_pool(genes, frac_elite, frac_lucky)

    offsprings = crossover.get_offspring(pool, choices_df, blocks, n_genes, multipliers)

    genes = offsprings


# print(np.mean([genes[g][1] for g in genes]))

best = genes[select_mating_pool.get_best(genes)][0]


for block in blocks:
    first = 0
    second = 0
    third = 0
        
    first = (choices_df[f'{block}_first_choice'] == best[f'{block}']).sum()

    for c, g, s in zip(choices_df[f'{block}_second_choices'], best[f'{block}'], choices_df['total_score']):
        if g in c:
            second += 1

    for c, g, s in zip(choices_df[f'{block}_third_choices'], best[f'{block}'], choices_df['total_score']):
        if g in c:
            third += 1

    print(block)
    print(f'{first = }, {second = }, {third = }, total = {first+second+third}')

Generating initial population:


100%|██████████| 5000/5000 [00:10<00:00, 492.20it/s]


Optimising population:


100%|██████████| 1000/1000 [4:47:23<00:00, 17.24s/it]   

companies
first = 102, second = 21, third = 17, total = 140
uni1
first = 121, second = 15, third = 7, total = 143
uni2
first = 121, second = 12, third = 6, total = 139
uni3
first = 121, second = 11, third = 6, total = 138
workshops
first = 119, second = 14, third = 7, total = 140





In [13]:
pickle.dump(choices_df, open('pickles/choices.pkl', 'wb'))
pickle.dump(genes, open('pickles/genes.pkl', 'wb'))
pickle.dump(best, open('pickles/best.pkl', 'wb'))

In [3]:
for block in blocks:
    l = [[o] * n for o, n in blocks[block].items()]


In [4]:
%%timeit
[item for sublist in l for item in sublist]

6.32 µs ± 28.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [6]:
%%timeit
list(chain(*l))

3.48 µs ± 84.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [13]:
list

[['Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben',
  'Oberflächen und Farben'],
 ['Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier',
  'Fourier'],
 ['Datenexploration',
  'Datenexploration',
  'Datenexploration',
  'Datenexploration',
  'Datenexploration',
  'Datenexploration',
  'Datenexploration',
  'Datenexploration',
  