<a href="https://colab.research.google.com/github/ViRu-ThE-ViRuS/ML_Projects/blob/master/Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import gym

np.random.seed(0)

In [0]:
env_name = 'CartPole-v1'
env = gym.make(env_name)

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import clone_model, model_from_json

model = Sequential()
model.add(Dense(input_shape=env.observation_space.shape, units=env.action_space.n, activation='linear', use_bias=False))
model.compile(loss='mse')
model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 2)                 8         
Total params: 8
Trainable params: 8
Non-trainable params: 0
_________________________________________________________________


In [0]:
import multiprocessing
from multiprocessing.pool import ThreadPool


class EvolutionaryStrategy():
    evolution = 0

    # 0.50, 0.20, 0.995, 0.95, 0.10
    def __init__(self, model, env_name, population_size=50, population=None, mutation=3.0, mutation_rate=0.50, mutation_decay=False, mutation_decay_rate=0.99, variable_crossed_progeny=False, selection_cutoff_decay=False, selection_cutoff_decay_rate=0.95, selection_cutoff=0.20):
        # setup the strategy
        self.model = model
        self.population_size = population_size

        self.mutation_rate = mutation_rate
        self.mutation_decay_rate = mutation_decay_rate if mutation_decay else 1
        self.mutation = mutation
        
        self.selection_cutoff_decay_rate = selection_cutoff_decay_rate if selection_cutoff_decay else 1
        self.selection_cutoff = selection_cutoff
        self.variable_crossed_progeny = variable_crossed_progeny
        self.env_name = env_name

        if population is None:
            self.candidate_shape = model.get_weights()[0].shape
            self.population = np.random.rand(self.population_size, *self.candidate_shape)
        else:
            print(f'evolution starting with population with average_performance: {np.average(self._evaluate(population)[:, 1])}')
            self.population = population
            self.candidate_shape = self.population.shape[1:]

    def evolve_step(self, return_population=False):
        # complete one evolution generation cycle
        self.evolution += 1
        print(f'EVOLUTION {self.evolution}')

        print(f'\tselecting from population...')
        if self.evolution-1:
            self.selection_cutoff *= self.selection_cutoff_decay_rate

        n_selected = int(self.population_size * self.selection_cutoff)
        scores = self._evaluate(self.population)

        # apply cutoff and get lucky outliers
        lucky_factor = 0.20 # change/remove ?
        top_selection, bottom_selection = int(n_selected*(1-lucky_factor)), int(n_selected*lucky_factor)
        scores, scrap = scores[:n_selected], scores[n_selected:]
        scores = scores[np.random.choice(np.arange(n_selected), top_selection)]
        scrap = scrap[np.random.choice(np.arange(self.population_size-n_selected), bottom_selection)]
        selected_candidates = np.vstack((scores, scrap))
        selected_population = self.population[np.array(selected_candidates[:, 0], dtype=np.int32)]

        print(f'\tbreeding from selected population...')
        n_bred = self.population_size # decay?
        progeny = self._breed_parallel(selected_population, n_bred)

        # we have a new population (the progeny from the best of the last generation)
        print(f'\tevaluating progeny...')
        self.population = progeny
        generation_evaluation = self._evaluate(self.population)
        best_performance = generation_evaluation[0, 1]
        average_performance = np.average(generation_evaluation[:, 1])
        print(f'evolution {self.evolution}: top_generation_performance = {best_performance}, average_generation_performance = {average_performance}')

        if not return_population:
            return self.population[int(generation_evaluation[0, 0])], average_performance
        else:
            return self.population, average_performance

    def _evaluate(self, population, test_episodes=5, from_thread=False):
        # evalute the given population
        scores = []
        
        env = gym.make(self.env_name)
        if from_thread:
            model = clone_model(self.model)    
        else:
            model = self.model

        for index, candidate in enumerate(population):
            # also incorporate the bias?
            model.set_weights([candidate])
            rewards = []
            for episode in range(test_episodes):
                state = env.reset()
                total_reward = 0
                done = False

                while not done:
                    action = np.argmax(model.predict(np.expand_dims(state, axis=0)))
                    state, reward, done, _ = env.step(action)
                    total_reward += reward

                rewards.append(total_reward)
            scores.append([index, np.average(rewards)])
        
        scores = np.array(scores)
        scores = scores[scores[:, 1].argsort(axis=0)][::-1]
        return scores

    def _breed(self, population, progeny_to_generate):
        # breed (crossover and mutate) within population
        print(f'\tbreeding from selected population...')

        bred = []
        for _ in range(progeny_to_generate):
            parents = population[np.random.choice(population.shape[0], 2)]
            progeny = self._mutate(self._crossover(parents))
            bred.append(progeny)

        return np.array(bred)

    def _breed_parallel(self, population, progeny_to_generate):
        # breed (crossover and mutate) within population
        def breeder_function(parents):
            return self._mutate(self._crossover(parents, from_thread=True), from_thread=True)
        
        args = []
        for _ in range(progeny_to_generate):
            args.append(population[np.random.choice(population.shape[0], 2)])

        bred = []
        pool = ThreadPool(processes=multiprocessing.cpu_count()//2)
        for i in pool.imap_unordered(breeder_function, args, chunksize=multiprocessing.cpu_count()):
            bred.append(i)

        return np.array(bred)

    def _mutate(self, progeny, from_thread=False):
        # mutate the given progeny
        if self.evolution-1:
            self.mutation_rate *= self.mutation_decay_rate
            self.mutation *= self.mutation_decay_rate

        mutating = int(self.mutation_rate * np.multiply(*self.population.shape[1:]))
        original_shape = progeny.shape
        progeny = progeny.reshape(-1)

        # try to mutate non-complete random chromosomes?

        while mutating != 0:            
            chromosome = np.random.randint(len(progeny))
            low = progeny[chromosome] - self.mutation * progeny[chromosome]
            high = progeny[chromosome] + self.mutation * progeny[chromosome]
            # progeny[chromosome] = ((high-low) * np.random.random_sample() + low)
            mutating -= 1

        return progeny.reshape(original_shape)

    def _crossover(self, parents, from_thread=False):
        # create a pogeny from the selected parents
        original_shape = parents[0].shape
        left, right = parents[0].reshape(-1), parents[1].reshape(-1)

        # modify crossover rate?
        if not self.variable_crossed_progeny:
            crossover_rate = 0.5 # dependence on left parent
            crossover_p = [crossover_rate, 1-crossover_rate]
        else:
            scores = self._evaluate([left.reshape(original_shape), right.reshape(original_shape)], from_thread=from_thread)
            if scores[0, 0] == 0:
                p_left, p_right = scores[0, 1], scores[1, 1]
            else:
                p_left, p_right = scores[1, 1], scores[0, 1]
            
            crossover_p = [p_left/(p_left + p_right), p_right/(p_left + p_right)]

        # simulate error in copying?
        
        progeny = []
        for index in range(len(left)):
            chance = np.random.choice(2, p=crossover_p)
            if chance:
                progeny.append(right[index])
            else:
                progeny.append(left[index])
        
        return np.array(progeny).reshape(original_shape)

In [0]:
def evaluate_model(model, test_episodes=50):
    rewards = []

    for _ in range(test_episodes):
        state = env.reset()
        total_reward = 0
        done = False

        while not done:
            action = np.argmax(model.predict(np.expand_dims(state, axis=0)))
            state, reward, done, _ = env.step(action)
            total_reward += reward
        
        rewards.append(total_reward)

    print(f'\nmodel_average_performance over {test_episodes} episodes = {np.average(rewards)}')
    print(f'model_peak_performance = {np.max(rewards)}, model_min_performance = {np.min(rewards)}')

In [0]:
e = EvolutionaryStrategy(model, env_name, mutation_decay=True, variable_crossed_progeny=True, selection_cutoff_decay=True)
evolution_track = []
for _ in range(15):
    candidate, evolution_average = e.evolve_step()
    evolution_track.append(evolution_average)

model.set_weights([candidate])
evaluate_model(model)

EVOLUTION 1
	selecting from population...
	breeding from selected population...
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
	evaluating progeny...
evolution 1: top_generation_performance = 500.0, average_generation_performance = 245.49200000000002
EVOLUTION 2
	selecting from population...
	breeding from selected population...
	evaluating progeny...
evolution 2: top_generation_performance = 500.0, average_generation_performance = 410.944
EVOLUTION 3
	selecting from population...
	breeding from selected population...
	evaluating progeny...
evolution 3: top_generation_performance = 500.0, average_generation_performance = 457.092
EVOLUTION 4
	selecting from population...
	breeding from selected population...
	evaluating progeny...
evolution 4: top_generation_performance = 500.0, average_generation_per

In [0]:
import matplotlib.pyplot as plt
% matplotlib inline
plt.plot(np.arange(len(evolution_track)), evolution_track)

In [0]:
def save_model(model, fname):
    model_json = model.to_json()
    
    with open(f'{fname}.json', 'w') as file:
        file.write(model_json)

    model.save_weights(f'{fname}.h5')
    print(f'saved model {fname} to disk')

save_model(model, f'model_stage_{e.evolution}_{e.mutation}_{e.selection_cutoff}')

### Todo
- [x] decay mutation_rate?
    - [x] no negative mutations? (test further)
- [ ] incorporate multiple layers?
- [ ] incorporate bias?
- [x] crossover based on parental performance?
    - [x] increase convergence? (test further)
- [x] parallelise code?
- [x] decay selection cutoff?
- [ ] crossover error?
- [x] selection outliers?
- [ ] mutate non-random chromosomes?