Skip to content

Commit

Permalink
Refactors. Changes Genomes to persistent vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
erp12 committed Jun 2, 2020
1 parent af16650 commit f4d419f
Show file tree
Hide file tree
Showing 25 changed files with 224 additions and 253 deletions.
15 changes: 12 additions & 3 deletions pyshgp/gp/estimators.py
Expand Up @@ -8,7 +8,9 @@
import pyshgp.gp.variation as vr
from pyshgp.gp.evaluation import DatasetEvaluator
from pyshgp.gp.genome import GeneSpawner
from pyshgp.push.interpreter import PushInterpreter, DEFAULT_INTERPRETER, PushConfig, ProgramSignature
from pyshgp.push.interpreter import PushInterpreter, DEFAULT_INTERPRETER
from pyshgp.push.config import PushConfig
from pyshgp.push.program import ProgramSignature
from pyshgp.utils import list_rindex
from pyshgp.validation import check_is_fitted, check_X_y
from pyshgp.monitoring import DEFAULT_VERBOSITY_LEVELS
Expand Down Expand Up @@ -89,6 +91,12 @@ def __init__(self,
self.verbose = verbose
self.ext = kwargs

# Initialize attributes that will be set later.
self.evaluator = None
self.signature = None
self.search = None
self.solution = None

self.verbosity_config = DEFAULT_VERBOSITY_LEVELS[self.verbose]
self.verbosity_config.update_log_level()

Expand All @@ -108,8 +116,6 @@ def _build_search_algo(self):
var_strat = vr.VariationStrategy()
for op_name, prob in self.variation_strategy.items():
var_op = vr.get_variation_operator(op_name)
if not isinstance(var_op, vr.VariationOperator):
var_op = self._build_component(var_op)
var_strat.add(var_op, prob)
self.variation_strategy = var_strat

Expand All @@ -129,6 +135,9 @@ def _build_search_algo(self):
)
self.search = sr.get_search_algo(self._search_name, config=search_config, **self.ext)

def is_initialized(self) -> bool:
return self.search is not None

def fit(self, X, y):
"""Run the search algorithm to synthesize a push program.
Expand Down
4 changes: 2 additions & 2 deletions pyshgp/gp/evaluation.py
Expand Up @@ -66,7 +66,7 @@ class Evaluator(ABC):
When a program's output cannot be evaluated on a particular case, the
penalty error is assigned. Default is 5e5.
verbosity_config : Optional[VerbosityConfig] (default = None)
A VerbosityConfig controling what is logged during evaluation.
A VerbosityConfig controlling what is logged during evaluation.
Default is no verbosity.
"""
Expand All @@ -83,7 +83,7 @@ def __init__(self,
def default_error_function(self, actuals, expecteds) -> np.array:
"""Produce errors of actual program output given expected program output.
The default error function is intented to be a universal error function
The default error function is intended to be a universal error function
for Push programs which only output a subset of the standard data types.
Parameters
Expand Down
207 changes: 82 additions & 125 deletions pyshgp/gp/genome.py
@@ -1,124 +1,84 @@
"""The :mod:`genome` module defines classes related to Genomesself.
from __future__ import annotations

The ``Genome`` class defines Genomes as flat, linear representations of Push
programs. The ``GenomeSpawner`` class is a factory of random genes (``Atoms``)
and random ``Genomes``.
"""
from collections import MutableSequence
from typing import Callable, Sequence, Union, Tuple, Optional, Any
from enum import Enum
from typing import Sequence, Union, Any, Callable, Optional, Tuple

import numpy as np
from pyrsistent import PRecord, field, CheckedPVector

from pyshgp.push.interpreter import ProgramSignature, Program
from pyshgp.push.type_library import infer_literal
from pyshgp.push.atoms import Atom, Closer, Literal, Instruction, CodeBlock
from pyshgp.push.instruction_set import InstructionSet
from pyshgp.gp.evaluation import Evaluator
from pyshgp.utils import DiscreteProbDistrib, Saveable, Copyable
from pyshgp.monitoring import VerbosityConfig, DEFAULT_VERBOSITY_LEVELS, log
from pyshgp.push import InstructionSet, ProgramSignature, Program
from pyshgp.push.atoms import Atom, CodeBlock, Instruction, Closer, Literal
from pyshgp.push.type_library import infer_literal
from pyshgp.utils import DiscreteProbDistrib


class Opener:
class Opener(PRecord):
"""Marks the start of one or more CodeBlock."""
count = field(type=int, mandatory=True)

__slots__ = ["count"]

def __init__(self, count: int):
self.count = count

def dec(self):
"""Decrements the count by 1."""
self.count -= 1


def _has_opener(l: Sequence) -> bool:
return sum([isinstance(_, Opener) for _ in l]) > 0


class Genome(MutableSequence, Saveable, Copyable):
"""A flat sequence of Atoms where each Atom is a "gene" in the genome."""

def __init__(self, atoms: Sequence[Atom] = None):
self.list = []
if atoms is not None:
for el in atoms:
self.append(el)

def __getitem__(self, i: int) -> Any:
return self.list.__getitem__(i)
def dec(self) -> Opener:
return Opener(count=self.count - 1)

def __setitem__(self, i: int, o: Any) -> None:
self.list.__setitem__(i, Genome._conform_element(o))

def __delitem__(self, i: int) -> None:
self.list.__delitem__(i)
def _has_opener(seq: Sequence) -> bool:
for el in seq:
if isinstance(el, Opener):
return True
return False

def __len__(self) -> int:
return self.list.__len__()

def __eq__(self, other):
return isinstance(other, Genome) and self.list == other.list
class Genome(CheckedPVector):
__type__ = Atom
__invariant__ = lambda a: (not isinstance(a, CodeBlock), 'CodeBlock')

def __repr__(self):
return "Genome" + self.list.__repr__()

def append(self, atom: Atom) -> None:
"""Append a non-CodeBlock Atom to the end of the Genome."""
self.list.append(Genome._conform_element(atom))
def genome_to_code(genome: Genome) -> CodeBlock:
"""Translate into nested CodeBlocks.
def insert(self, index: int, atom: Atom) -> None:
"""Insert Atom before index."""
self.list.insert(Genome._conform_element(atom))
These CodeBlocks can be considered the Push program representation of
the Genome which can be executed by a PushInterpreter and evaluated
by an Evaluator.
@staticmethod
def _conform_element(el: Any) -> Atom:
if isinstance(el, CodeBlock):
raise ValueError("Cannot add CodeBlock to genomes. Genomes must be kept flat.")
return el

def to_code_block(self) -> CodeBlock:
"""Translate into nested CodeBlocks.
These CodeBlocks can be considered the Push program representation of
the Genome which can be executed by a PushInterpreter and evaluated
by an Evaluator.
"""
plushy_buffer = []
for atom in self:
plushy_buffer.append(atom)
if isinstance(atom, Instruction) and atom.code_blocks > 0:
plushy_buffer.append(Opener(atom.code_blocks))

push_buffer = []
while True:
# If done with plush but unclosed opens, recur with one more close.
if len(plushy_buffer) == 0 and _has_opener(push_buffer):
plushy_buffer.append(Closer())
# If done with plush and all opens closed, return push.
elif len(plushy_buffer) == 0:
return CodeBlock(*push_buffer)
else:
atom = plushy_buffer[0]
# If next instruction is a close, and there is an open.
if isinstance(atom, Closer) and _has_opener(push_buffer):
ndx, opener = [(ndx, el) for ndx, el in enumerate(push_buffer) if isinstance(el, Opener)][-1]
post_open = push_buffer[ndx + 1:]
pre_open = push_buffer[:ndx]
if opener.count == 1:
push_buffer = pre_open + [post_open]
else:
opener.dec()
push_buffer = pre_open + [post_open, opener]
# If next instruction is a close, and there is no open.
elif not isinstance(atom, Closer):
push_buffer.append(atom)
del plushy_buffer[0]

def make_str(self) -> str:
"""Create one simple str representation of the Genome."""
return " ".join([str(gene) for gene in self])
"""
plushy_buffer = []
for atom in genome:
plushy_buffer.append(atom)
if isinstance(atom, Instruction) and atom.code_blocks > 0:
plushy_buffer.append(Opener(count=atom.code_blocks))

push_buffer = []
while True:
# If done with plush but unclosed opens, recur with one more close.
if len(plushy_buffer) == 0 and _has_opener(push_buffer):
plushy_buffer.append(Closer())
# If done with plush and all opens closed, return push.
elif len(plushy_buffer) == 0:
return CodeBlock(*push_buffer)
else:
atom = plushy_buffer[0]
# If next instruction is a close, and there is an open.
if isinstance(atom, Closer) and _has_opener(push_buffer):
ndx, opener = [(ndx, el) for ndx, el in enumerate(push_buffer) if isinstance(el, Opener)][-1]
post_open = push_buffer[ndx + 1:]
pre_open = push_buffer[:ndx]
if opener.count == 1:
push_buffer = pre_open + [post_open]
else:
opener = opener.dec()
push_buffer = pre_open + [post_open, opener]
# If next instruction is a close, and there is no open.
elif not isinstance(atom, Closer):
push_buffer.append(atom)
del plushy_buffer[0]


class GeneTypes(Enum):
INSTRUCTION = 1
CLOSE = 2
LITERAL = 3
ERC = 4


class GeneSpawner:
Expand Down Expand Up @@ -173,7 +133,7 @@ class GeneSpawner:

def __init__(self,
instruction_set: InstructionSet,
literals: Sequence[Union[Literal, Any]],
literals: Sequence[Any],
erc_generators: Sequence[Callable],
distribution: DiscreteProbDistrib = "proportional"):
self.instruction_set = instruction_set
Expand All @@ -184,10 +144,10 @@ def __init__(self,
if distribution == "proportional":
self.distribution = (
DiscreteProbDistrib()
.add("instruction", len(instruction_set))
.add("close", sum([i.code_blocks for i in instruction_set.values()]))
.add("literal", len(literals))
.add("erc", len(erc_generators))
.add(GeneTypes.INSTRUCTION, len(instruction_set))
.add(GeneTypes.CLOSE, sum([i.code_blocks for i in instruction_set.values()]))
.add(GeneTypes.LITERAL, len(literals))
.add(GeneTypes.ERC, len(erc_generators))
)
else:
self.distribution = distribution
Expand Down Expand Up @@ -231,7 +191,7 @@ def random_erc(self) -> Literal:
erc_value = infer_literal(erc_value, self.type_library)
return erc_value

def spawn_atom(self) -> Atom:
def random_gene(self) -> Atom:
"""Return a random Atom based on the GenomeSpawner's distribution.
Returns
Expand All @@ -241,13 +201,13 @@ def spawn_atom(self) -> Atom:
"""
atom_type = self.distribution.sample()
if atom_type == "instruction":
if atom_type is GeneTypes.INSTRUCTION:
return self.random_instruction()
elif atom_type == "close":
elif atom_type is GeneTypes.CLOSE:
return Closer()
elif atom_type == "literal":
elif atom_type is GeneTypes.LITERAL:
return self.random_literal()
elif atom_type == "erc":
elif atom_type is GeneTypes.ERC:
return self.random_erc()
else:
raise ValueError("GenomeSpawner distribution bad atom type {t}".format(t=str(atom_type)))
Expand Down Expand Up @@ -275,11 +235,8 @@ def spawn_genome(self, size: Union[int, Sequence[int]]) -> Genome:
if isinstance(size, Sequence):
size = np.random.randint(size[0], size[1]) + 1

gn = Genome()
for ndx in range(size):
gn.append(self.spawn_atom())

return gn
genes = [self.random_gene() for _ in range(size)]
return Genome.create(genes)


class GenomeSimplifier:
Expand All @@ -290,9 +247,9 @@ class GenomeSimplifier:
introduce subtle errors or behaviors that is not covered by the training
cases. Removing the superfluous code makes genomes (and thus programs)
smaller and easier to understand. More importantly, simplification can
imporve the generalization of the given genome/program.
improve the generalization of the given genome/program.
The process of geneome simplification is iterative and closely resembles
The process of genome simplification is iterative and closely resembles
simple hill climbing. For each iteration, the simplifier will randomly
select a small number of random genes to remove. The Genome is re-evaluated
and if its error gets worse, the change is reverted. After repeating this
Expand Down Expand Up @@ -320,16 +277,16 @@ def __init__(self,
self.verbosity_config = verbosity_config

def _remove_rand_genes(self, genome: Genome) -> Genome:
gn = genome.copy(deep=True)
gn = genome
n_genes_to_remove = min(np.random.randint(1, 4), len(genome) - 1)
ndx_of_genes_to_remove = np.random.choice(np.arange(len(gn)), n_genes_to_remove, replace=False)
ndx_of_genes_to_remove[::-1].sort()
for ndx in ndx_of_genes_to_remove:
del gn[ndx]
gn = gn.delete(ndx)
return gn

def _errors_of_genome(self, genome: Genome) -> np.ndarray:
cb = genome.to_code_block()
cb = genome_to_code(genome)
program = Program(cb, self.program_signature)
return self.evaluator.evaluate(program)

Expand All @@ -354,7 +311,7 @@ def simplify(self,
Parameters
----------
genome
The Genome to simplifiy.
The Genome to simplify.
original_errors
Error vector of the genome to simplify.
steps
Expand All @@ -363,7 +320,7 @@ def simplify(self,
Returns
-------
pushgp.gp.genome.Genome
A Genome with random contents of a given size.
The shorter Genome that expresses the same computation.
"""
if self.verbosity_config is None:
Expand Down
8 changes: 4 additions & 4 deletions pyshgp/gp/individual.py
Expand Up @@ -8,8 +8,8 @@

import numpy as np

from pyshgp.gp.genome import Genome
from pyshgp.push.interpreter import Program, ProgramSignature
from pyshgp.gp.genome import Genome, genome_to_code
from pyshgp.push.program import Program, ProgramSignature
from pyshgp.utils import Saveable, Copyable


Expand All @@ -30,7 +30,7 @@ class Individual(Saveable, Copyable):
"""

__slots__ = [
"genome", "signature", "push_config",
"genome", "signature",
"_program", "_error_vector", "_total_error", "_error_vector_bytes"
]

Expand All @@ -45,7 +45,7 @@ def __init__(self, genome: Genome, signature: ProgramSignature):
def get_program(self) -> Program:
"""Push program of individual. Taken from Plush genome."""
if self._program is None:
cb = self.genome.to_code_block()
cb = genome_to_code(self.genome)
self._program = Program(cb, self.signature)
return self._program

Expand Down

0 comments on commit f4d419f

Please sign in to comment.