In [1]:
from collections import deque
import numpy as np
import pandas as pd
import seaborn as sns
import random
import pickle
import glob

from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

import wandb

from tetris.Environment import TetrisEnv
from util.decaying import DecayingLinear

In [2]:
POPULATION_SIZE = 50
ELITE_SIZE = 3  # elite-selection (copy first n-th directly without any recombination/mutation)

TOURNAMENT_SIZE = 3
TOURNAMENT_RANGE = np.arange(POPULATION_SIZE)

GENOM_SIZE = 25

MOVE_AMOUNT_CAP = 400

In [3]:
env = TetrisEnv()
mutation_rate = DecayingLinear(2, 0.5, 10)

In [4]:
# sadly slower:
# def tournament_selection_1(fitnesses):
#     tournament = np.random.choice(TOURNAMENT_RANGE, size=TOURNAMENT_SIZE, replace=False)
    
#     return tournament[np.argmax(np.take(fitnesses, tournament))]

def tournament_selection(fitnesses):
    attended = []
    best_fitness = -1000
    best_participant = -1
    
    for _ in range(TOURNAMENT_SIZE):
        participant = random.randrange(POPULATION_SIZE)
        
        while participant in attended:
            participant = random.randrange(POPULATION_SIZE)
            
        attended.append(participant)
        
        if fitnesses[participant] > best_fitness:
            best_fitness = fitnesses[participant]
            best_participant = participant
    
    return best_participant

def tournament_selection_parents(fitnesses):
    p1 = tournament_selection(fitnesses)
    p2 = tournament_selection(fitnesses)
    
    while p2 == p1:
        p2 = tournament_selection(fitnesses)
    
    return p1, p2

In [5]:
def get_best_state(heuristics, genome):
    weighted = np.multiply(heuristics, genome)
    
    # linear
    linear = np.sum(weighted, axis=1)
    # todo: test others
    
    return np.argmax(linear)

In [6]:
def crossover(parent1, parent2):
    cut_point = random.randrange(GENOM_SIZE)
    
    child1 =  np.concatenate([parent1[:cut_point], parent2[cut_point:]])
    child2 =  np.concatenate([parent2[:cut_point], parent1[cut_point:]])
    
    return child1, child2

def two_point_crossover(parent1, parent2):
    return crossover(*crossover(parent1, parent2))

In [7]:
def mutate(genome):
    # use normal distribution to mutate
    return np.multiply(np.random.normal(1, mutation_rate.get(), size=(GENOM_SIZE)), genome) 

In [8]:
def determine_fitness(genome, print_debug=False):
    env.reset()
    
    while True:
        states, scores, clears, heuristics, dones = env.get_next_states()
        
        chosen_index = get_best_state(heuristics, genome)
        
        if dones[chosen_index] or env.moves >= MOVE_AMOUNT_CAP:
            if print_debug:
                print(f'Score: {env.score}')
                print(f'Moves: {env.moves}, Clears: {env.clears}, t-spins: {env.tspins}, all_clears: {env.all_clears}')
            
            wandb.log({'game/score': env.score, 'game/moves': env.moves, 'game/tspins': env.tspins, 'game/all_clears': env.all_clears, 
                   'game/singles': env.clears[0], 'game/doubles': env.clears[1], 'game/triples': env.clears[2], 'game/quads': env.clears[3],
            })
            
            return env.score + env.moves  # also use moves so it has a starting point
        else:
            env.step(states[chosen_index], clears[chosen_index], scores[chosen_index])

In [9]:
def get_elite(genomes, fitnesses):
    indices = np.argsort(fitnesses)[-ELITE_SIZE:]
    
    return np.take(genomes, indices, axis=0), np.take(fitnesses, indices, axis=0)

In [10]:
def generate_random_population():
    genomes = np.abs(np.random.normal(2.5, size=(POPULATION_SIZE, GENOM_SIZE)))
    fitnesses = np.array([determine_fitness(genome) for genome in genomes])
    
    return genomes, fitnesses 

### Training loop

In [11]:
wandb.init(project='tetris-genetic', config={ 'POPULATION_SIZE': POPULATION_SIZE, 'ELITE_SIZE': ELITE_SIZE, 'TOURNAMENT_SIZE': TOURNAMENT_SIZE, 'GENOM_SIZE': GENOM_SIZE, 'MOVE_AMOUNT_CAP': MOVE_AMOUNT_CAP })

[34m[1mwandb[0m: Currently logged in as: [33mfischly[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
population, fitnesses = generate_random_population()

In [13]:


for generation in range(21):
    children_genomes = []
    children_fitnesses = []

    with tqdm(total=POPULATION_SIZE - ELITE_SIZE) as pbar:
        while len(children_genomes) < POPULATION_SIZE - ELITE_SIZE:
            # parent selection
            parent1, parent2 = tournament_selection_parents(fitnesses)
            parent1, parent2 = population[parent1], population[parent2]

            # recombination
            child1, child2 = two_point_crossover(parent1, parent2)

            # mutation
            child1, child2 = mutate(child1), mutate(child2)

            # fitness evaluation
            child1_fitness, child2_fitness = determine_fitness(child1), determine_fitness(child2)

            # store children
            children_fitnesses.append(child1_fitness)
            children_fitnesses.append(child2_fitness)
            
            wandb.log({ 'genetic/generation': generation, 'genetic/fitness': child1_fitness})
            wandb.log({ 'genetic/generation': generation, 'genetic/fitness': child1_fitness})

            children_genomes.append(child1)
            children_genomes.append(child2)
            
            pbar.update(2)
    
    
    # update population    
    elite_genomes, elite_fitnesses = get_elite(population, fitnesses)
    
    wandb.log({ 'elite/generation': generation, 'elite/fitness': elite_fitnesses[2]})
    wandb.log({ 'elite/generation': generation, 'elite/fitness': elite_fitnesses[1]})
    wandb.log({ 'elite/generation': generation, 'elite/fitness': elite_fitnesses[0]})
    
    
    population[:ELITE_SIZE] = elite_genomes
    fitnesses[:ELITE_SIZE] = [determine_fitness(elite_genome) for elite_genome in elite_genomes]
    
    population[ELITE_SIZE:] = children_genomes[:POPULATION_SIZE - ELITE_SIZE]
    fitnesses[ELITE_SIZE:] = children_fitnesses[:POPULATION_SIZE - ELITE_SIZE]
    
    print(f'GENERATION {generation} DONE! Max fitness: {np.max(fitnesses)}')
    
    mutation_rate.step()
    
# env.get_next_states()

  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 0 DONE! Max fitness: 834


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 1 DONE! Max fitness: 474


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 2 DONE! Max fitness: 1634


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 3 DONE! Max fitness: 3667


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 4 DONE! Max fitness: 13437


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 5 DONE! Max fitness: 6762


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 6 DONE! Max fitness: 7325


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 7 DONE! Max fitness: 7538


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 8 DONE! Max fitness: 14862


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 9 DONE! Max fitness: 6050


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 10 DONE! Max fitness: 6488


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 11 DONE! Max fitness: 11490


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 12 DONE! Max fitness: 9925


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 13 DONE! Max fitness: 5975


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 14 DONE! Max fitness: 10375


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 15 DONE! Max fitness: 11625


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 16 DONE! Max fitness: 11200


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 17 DONE! Max fitness: 12175


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 18 DONE! Max fitness: 10300


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 19 DONE! Max fitness: 10150


  0%|          | 0/47 [00:00<?, ?it/s]

GENERATION 20 DONE! Max fitness: 14707


In [14]:
elite_genomes, elite_fitnesses = get_elite(population, fitnesses)

In [15]:
# for n in elite_genomes[2]:
    # print("{:02f}".format(n))

In [20]:
determine_fitness(elite_genomes[2], True)

Score: 2875.0
Moves: 181, Clears: [22, 7, 5, 2], t-spins: 0, all_clears: 0


3056.0

wandb: Waiting for W&B process to finish... (success).
wandb: - 0.002 MB of 0.002 MB uploaded (0.000 MB deduped)

In [17]:
# save run
np.savetxt('models/genetic-300limit-5.txt', elite_genomes, fmt='%f')

In [18]:
np.savetxt('models/genetic-300limit-5-FULL.txt', population, fmt='%f')