In [0]:
import numpy as np
class GeneticAgent():
  
  def __init__(self,population_size,chromosome_size,gene_variant,elite_pop_size,mutation_chance,crossover_type,gym_env=True):
    self.population_size = population_size
    self.chromosome_size = chromosome_size
    self.gene_variant = gene_variant
    self.elite_pop_size = elite_pop_size
    self.mutation_chance = mutation_chance
    self.crossover_type = crossover_type
    self.gym_env = gym_env
    self.population = [np.random.randint(self.gene_variant,size=self.chromosome_size) for _ in range(population_size)]
    
  def select_elites(self,fitness_scores):
    population_ranks = list(reversed(np.argsort(fitness_scores)))
    return [self.population[x] for x in population_ranks[:self.elite_pop_size]]
  
  def select_parents(self,select_prob):
      return self.population[np.random.choice(range(self.population_size),p = select_prob)],self.population[np.random.choice(range(self.population_size),p = select_prob)]
  
  def generate_new_population(self,fitness_scores):
    select_prob = np.array(fitness_scores)/np.sum(fitness_scores)
    child_set = []
    for generate in range(int((self.population_size - self.elite_pop_size)/2)):
      if(self.crossover_type == "Digit"):
        children = self.crossover_with_digit(self.select_parents(select_prob))
      else:
        children = self.crossover_with_pieces(self.select_parents(select_prob))
      child_set.append(children[0])
      child_set.append(children[0])
    self.population = child_set + self.select_elites(fitness_scores = fitness_scores)

  def crossover_with_digit(self,parents):
    children = {0: np.random.randint(self.gene_variant, size = self.chromosome_size),
                1: np.random.randint(self.gene_variant, size = self.chromosome_size)}
    for child in range(2):
      for gene in range(self.chromosome_size):
        if(np.random.uniform() < self.mutation_chance):
          children[child][gene] = np.random.randint(self.gene_variant)
        else:
          if(np.random.uniform() < 0.5):
            children[child][gene] = parents[0][gene]
          else:
            children[child][gene] = parents[1][gene]
    return children[0],children[1]

  def crossover_with_pieces(self,parents):
    pos = np.random.randint(self.chromosome_size)
    child1 = np.concatenate((parents[0][:pos],parents[1][pos:]),axis=0)
    child2 = np.concatenate((parents[1][:pos],parents[0][pos:]),axis=0)
    for i in np.arange(self.chromosome_size):
      if(np.random.uniform() < self.mutation_chance):
        child_1[i] = random.randint(self.gene_variant)
    for i in np.arange(self.chromosome_size):
      if(np.random.uniform() < self.mutation_chance):
        child_2[i] = random.randint(self.gene_variant)
    return child_1,child_2
  
  def gym_play(self,env,total_game_number):
    fitness_scores = []
    if(self.gym_env):
      for individual in self.population:
        total_reward = 0
        for ep in range(total_game_number):
          state = env.reset()
          while(True):
            action = individual[state]
            next_state,reward,done, info = env.step(action)
            if(done):
              total_reward += reward
              break
            state = next_state
        fitness_scores.append(total_reward/total_game_number)


In [9]:
import gym
import matplotlib.pyplot as plt
from matplotlib import ticker
env = gym.make("FrozenLake-v0")
genetic_agent = GeneticAgent(population_size=60,chromosome_size=env.observation_space.n,gene_variant=env.action_space.n,elite_pop_size=4,mutation_chance=0.05,crossover_type="Digit",gym_env=True)
scores_with_digit = []
scores_with_peaces_co = []
total_game_number = 10
total_generation_number = 20
for generation in range(total_generation_number):
  fitness_scores = genetic_agent.gym_play(env,total_game_number=total_game_number)
  genetic_agent.generate_new_population(fitness_scores)
  generation_score = np.average(fitness_scores)
  scores_with_digit_co.append(generation_score)
genetic_agent = GeneticAgent(population_size=60,chromosome_size=env.observation_space.n,gene_variant=env.action_space.n,elite_pop_size=4,mutation_chance=0.05,crossover_type="Piece",gym_env=True)
for generation in range(total_generation_number):
  fitness_scores = genetic_agent.gym_play(env,total_game_number=total_game_number)
  genetic_agent.generate_new_population(fitness_scores)
  generation_score = np.average(fitness_scores)
  scores_with_peaces_co.append(generation_score)

y1 = scores_with_digit_co
y2 = scores_with_peaces_co

x = np.arange(total_generation_number)

plt.subplot(2,1,1)
plt.plot(x,y1,'o-')
plt.title("Genetic Algorithms for R1 Environments")
plt.ylabel("Scores of Digit co")
locator = ticker.MultipleLocator(2)
plt.gca().xaxis.set_major_locator(locator)
plt.subplot(2,1,2)
plt.plot(x,y2,'*-')

TypeError: ignored