In [1]:
import pandas as pd
import numpy as np
from numpy.random import default_rng
import random
from itertools import chain
import pickle

import choices
import create_gene
import calculate_fitness
import select_mating_pool
import crossover

from tqdm import tqdm

rng = default_rng()

In [2]:
# create test subjects and test populations

n_genes = 1000 # number of genes (population)

n_iterations = 100

companies = {'Zeix': 20, 'SCS Supercomputing Systems': 20, 'Kyburz': 20, 'Siemens Mobility AG': 20, 'Varian': 20, 
             'MAN Energy Solutions':20, 'Universitätsspital Zürich': 20, 'Acht Grad Ost': 20, 'Weidmann': 20, 
             'True Wealth': 20, 'PartnerRe': 20, 'Sensirion': 20, 'Hocoma': 20}
uni1 = {'Architektur (ETH)': 30, 'Biologie (ETH)': 60, 'Raumbezogene Ingenieurwissenschaften (ETH)': 16, 
        'Umweltingenieurwissenschaften (ETH)': 16, 'Autonome Agile Drohnen (UZH)': 40, 
        'Biodiversität in Zeit und Raum (UZH)': 24, 'Einblick in aktuelle Forschung am Physik-Institut (UZH)': 20}
uni2 = {'Chemie (ETH)': 25, 'Materialwissenschaft (ETH)': 20, 'Medizin studieren an der ETH': 50, 
        'Pharmazeutische Wissenschaften (ETH)': 24, 'Physik (ETH)': 20, 'Illuminating the Chemistry of Life (UZH)': 30, 
        'Was ist ein Programm? (UZH)': 20}
uni3 = {'Aktuelle Forschung am CERN (UZH)': 30, 'Hyperwürfel (UZH)': 20, 'Krebsforschung (UZH)': 20, 
        'Informatik (ETH)': 16, 'Informationstechnologie und Elektrotechnik (ETH)': 30, 
        'Maschineningenieurwissenschaften (ETH)': 40, 'Mathematik (ETH)': 30}
workshops = {'Oberflächen und Farben': 20, 'Fourier-Reihen': 20, 'Datenexploration': 20, 
             'Astrophysik, Planetologie und Raumfahrt': 20, 'Molekulare Schalter – eine on/off-Beziehung': 12, 
             "Let's Arduino": 12, 'Die Hoffnung ist erneuerbar': 20, "Swiss Young Physicists' Tournament": 20, 
             'Von Big Data und Künstlicher Intelligenz zur Umweltinformatik': 16, 
             'Woher kommst du wirklich? Frag deine DNA!': 20, 'Astronomie': 20, 
             'Entsorgung, Recycling und Abwasserreinigung in Zürich': 16, 
             'Praktische Molekulargenetik: Wolbachia – auf der Suche nach Bakterien-DNA in selbstgefangenen Insekt': 14, 
             'Energiespeicherung – Vom Akku bis zum Pump-Speicherstausee': 20}

blocks = {'companies': companies, 'uni1': uni1, 'uni2': uni2, 'uni3': uni3, 'workshops': workshops}
block_names = [block for block in blocks.keys()]

multipliers = [50, 2, 1]
counter = [0, 0, 0]

priorities_df = pd.read_json('priorities.json')
n_people = len(priorities_df)

choices_df = choices.get_choices(blocks, priorities_df)

print('Generating initial population:')
genes = {}
for i in tqdm(range(n_genes)):
    id = f'GID{i:05}'
    gene_df = create_gene.get_gene(blocks, n_people)

    fitness = calculate_fitness.calc_fitness(choices_df, gene_df, block_names, multipliers)

    genes[id] = [gene_df, fitness]


frac_elite = 0.1
frac_lucky = 0.01

print('Optimising population:')
for i in tqdm(range(n_iterations)):
    pool = select_mating_pool.get_mating_pool(genes, frac_elite, frac_lucky)

    offsprings = crossover.get_offspring(pool, choices_df, blocks, n_genes, multipliers)

    genes = offsprings


# print(np.mean([genes[g][1] for g in genes]))

best = genes[select_mating_pool.get_best(genes)][0]


for block in blocks:
    first = 0
    second = 0
    third = 0

    print(f'{block}:')

    for level in ['first', 'second', 'third']:

        filter_col = [col for col in choices_df if col.startswith(f'{block}_{level}')]
        number = choices_df[filter_col].isin(gene_df[block]).any(axis=1).sum()
        print(f'{level} = {number}')


Generating initial population:


100%|██████████| 1000/1000 [00:11<00:00, 89.70it/s]


Optimising population:


 34%|███▍      | 34/100 [06:16<12:11, 11.08s/it]

In [None]:
pickle.dump(choices_df, open('pickles/choices.pkl', 'wb'))
pickle.dump(genes, open('pickles/genes.pkl', 'wb'))
pickle.dump(best, open('pickles/best.pkl', 'wb'))

In [4]:
for block in blocks:
    l = [[o] * n for o, n in blocks[block].items()]


In [5]:
%%timeit
[item for sublist in l for item in sublist]

5.8 µs ± 46.3 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [6]:
%%timeit
list(chain(*l))

2.75 µs ± 24.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [7]:
counter = [0, 0, 0]
fitness = 0

filter_col = [col for col in choices_df if col.startswith(f'uni1_first')]

(choices_df[filter_col].isin(gene_df['uni1']).any(axis=1) * choices_df['total_score']).sum()




1331

In [57]:
first_choice = choices_df['uni1_first_0']
gene_expression = gene_df['uni1']
first_list = first_choice.tolist()
gene_list = gene_expression.tolist()

In [58]:
counter = 0
for i, item in enumerate(first_list):
    if item == gene_list[i]:
        print(i, item)
        counter += 1

print(counter)

0 Biologie (ETH)
1 Biologie (ETH)
2 Biologie (ETH)
5 Autonome Agile Drohnen (UZH)
9 Umweltingenieurwissenschaften (ETH)
18 Autonome Agile Drohnen (UZH)
26 Biologie (ETH)
37 Raumbezogene Ingenieurwissenschaften (ETH)
52 Raumbezogene Ingenieurwissenschaften (ETH)
60 Autonome Agile Drohnen (UZH)
65 Biologie (ETH)
67 Biologie (ETH)
77 Architektur (ETH)
78 Biologie (ETH)
81 Einblick in aktuelle Forschung am Physik-Institut (UZH)
86 Autonome Agile Drohnen (UZH)
92 Umweltingenieurwissenschaften (ETH)
98 Architektur (ETH)
100 Biologie (ETH)
119 Biologie (ETH)
122 Biologie (ETH)
123 Autonome Agile Drohnen (UZH)
125 Autonome Agile Drohnen (UZH)
129 Biologie (ETH)
137 Architektur (ETH)
150 Autonome Agile Drohnen (UZH)
152 Architektur (ETH)
160 Autonome Agile Drohnen (UZH)
163 Einblick in aktuelle Forschung am Physik-Institut (UZH)
169 Autonome Agile Drohnen (UZH)
171 Architektur (ETH)
174 Raumbezogene Ingenieurwissenschaften (ETH)
32


In [54]:
first_choice.value_counts()

Hocoma    1
Name: companies_second_12, dtype: int64

In [23]:
gene_expression.iloc[16]

'Universitätsspital Zürich'

In [61]:
choices_df['total_score']

KeyError: 'total_score'