In [1]:
import data_analysis_functions_redux as das
import numpy as np
import pandas as pd
import genetic
import time
%load_ext line_profiler

def sse(adf):
    return - np.sum((bdf['Obs'] - adf) ** 2)

def fitness(alpha, initial_level = None):
    f = das.exponential_smoothing(bdf['Obs'], alpha, 0, initial_level=initial_level)
    adf = pd.Series(f[:-1], index=bdf.index).rename("Obs")
    return sse(adf)

def fitness2(alpha, initial_level = None):
    return das.fast_exponential_smoothing(bdf['Obs'],alpha, 0, initial_level=initial_level)

def crossover(params):
    mom, dad = params
    avg = (mom + dad) / 2
    std = max((avg + mom) / 2,.1)
    return np.random.normal(avg, std)

In [2]:
import numpy as np
import random

class Genetic_Vector:
    def __init__(self, vector, generation, parameter_bounds, fitness_function, mutation_rate = 0.015, simple_mutation = True, simple_crossover = True, crossover_function = None, type_check = True, creep_mutate = True):
        self.vector = vector
        self.generation = generation
        self.parameter_bounds = parameter_bounds
        self.fitness = fitness_function
        self.simple_mutation = simple_mutation
        self.mutation_rate = mutation_rate
        self.simple_crossover = simple_crossover
        self.crossover_function = crossover_function
        self.creep_mutate = creep_mutate
        self.fitness_val = self.fitness(*list(self.vector))
        self.vector_length = len(self.vector)

        if type_check:
            if not isinstance(self.vector, np.ndarray):
                self.vector = np.squeeze(np.asarray(self.vector))
                if len(self.vector.shape) > 1:
                    raise TypeError("Parameter vector must be a 1-dimensional array-like")
            
            if isinstance(parameter_bounds, tuple) and len(parameter_bounds) == 2:
                self.parameter_bounds = [parameter_bounds] * self.vector_length
            
            if not all(isinstance(item, tuple) and len(item) == 2 for item in self.parameter_bounds):
                raise TypeError("Parameter parameter_bounds must be a list of 2-tuples")
            
            if not callable(self.fitness):
                raise TypeError("Parameter fitness_function must be a function")

            if not simple_crossover and crossover_function is None:
                raise TypeError("Declare a crossover function, otherwise flag simple_crossover as True")
    
    def __lt__(self, other):
        return self.fitness_val < other.fitness_val
    
    def __le__(self, other):
        return self.fitness_val <= other.fitness_val
    
    def __eq__(self, other):
        return self.fitness_val == other.fitness_val
    
    def __gt__(self, other):
        return self.fitness_val > other.fitness_val
    
    def __ge__(self, other):
        return self.fitness_val >= other.fitness_val
    
    def __ne__(self, other):
        return self.fitness_val != other.fitness_val
        
    def mate(self, other):
        """Mating Function
        
        Parameters
        -----------
        other: Genetic_Vector
            The other parent
        
        Returns
        -------
        child: Genetic_Vector
            Child vector
        """
        if not isinstance(other, Genetic_Vector):
            raise ValueError("Other parent is invalid")
        
        if self.vector_length != other.vector_length:
            raise ValueError("Vector lengths do not match")
        
        child_vector = np.empty(self.vector_length)
        
        #If we are doing simple crossover, we're just going to take one of the two parent parameters
        if self.simple_crossover:
            for i in range(self.vector_length):
                parents = [self, other]
                rand_parent = random.randint(0,1)
                child_vector[i] = parents[rand_parent].vector[i]
        #If we are not doing simple crossover, we're going to evaluate the complex crossover
        else:
            for i in range(self.vector_length):
                parent_param_tuple = (self.vector[i], other.vector[i])
                param_bounds = self.parameter_bounds[i]
                child_vector[i] = self._complex_crossover(parent_param_tuple, param_bounds)
        
        child_mutation_rate = self.mutation_rate + (random.random() * (0.4) - 0.2) * self.mutation_rate

        child_vector = self.mutate(child_vector, child_mutation_rate)
        child_generation = max(self.generation, other.generation) + 1
        
        child = Genetic_Vector(child_vector, child_generation, self.parameter_bounds, self.fitness, mutation_rate = child_mutation_rate, simple_mutation = self.simple_mutation, simple_crossover = self.simple_crossover, crossover_function = self.crossover_function, type_check = False)

        return child

    def mutate(self, vector, mutation_rate):
        """Mutation Function

        Parameters
        ----------
        vector: array
            The vector that will undergo mutation
        
        mutation_rate: float
            The mutation rate
        
        Returns
        -------
        vector: array
            vector that has underwent mutation (if at all)
        """
        if not self.creep_mutate:
            for i in range(len(vector)):
                r = random.random()
                if r < mutation_rate:
                    vector[i] = random.randint(*self.parameter_bounds[i])
            
            return vector
        else:
            for i in range(len(vector)):
                lower, upper = self.parameter_bounds[i]
                r = random.random()
                s = random.random() * (upper - lower) * 0.25 - (upper - lower) * 0.5
                if r < mutation_rate:
                    if vector[i] + s < upper and vector[i] + s > lower:
                        vector[i] += s
            
            return vector
    
    def _complex_crossover(self, params, specific_bounds):
        """Custom crossover function

        Parameters
        ----------
        param: 2-tuple
            The appropriate parameter for both parents
        
        specific_bounds: 2-tuple
            Upper and lower bounds for the parameter
        
        Returns
        -------
        child_value: value
            Result of crossover function
        """
        candidate_within_bounds = False
        #Loop so long as the candidate is not within the bounds 
        while not candidate_within_bounds:
            candidate_value = self.crossover_function(params)
            lower_bound, upper_bound = specific_bounds
            candidate_within_bounds = (candidate_value >= lower_bound) and (candidate_value <= upper_bound)
        
        return candidate_value

class Genetic_Pool:
    def __init__(self, template, size = None, elitism = False, selection_method = "tournament"):
        if not isinstance(template, Genetic_Vector):
            raise TypeError("Parameter template must be a Genetic_Vector object")

        self.template = template
        self.elitism = elitism
        self.members = (self.template.vector_length * 10) if not size else size
        self.selection_method = selection_method
        self.pool = []
    
    def _parent_selection(self):
        fxn_dict = {
            "tournament": self._tournament
        }
        try:
            optimization_chosen = fxn_dict[self.selection_method.lower()]
        except KeyError:
            raise ValueError(f"{self.selection_method} is not a valid optimization method")

        return optimization_chosen()
        
    def _tournament(self, tournament_size = 6):
        """Runs a tournament knockout style for parent selection
        
        Keyword Arguments:
            tournament_size {int} -- Number of entrants to a tournament. The farther from a power of 2, the more likely it is to have diversity (default: {6})
        
        Returns:
            Genetic_Vector[] -- List of Genetic_Vectors that are the selected parents
        """
        
        try:
            candidates = random.sample(self.pool, tournament_size)
        except ValueError:
            return random.sample(self.pool, 2)
        
        rounds = (tournament_size - 1).bit_length() - 1

        for i in range(rounds):
            next_round = []
            while len(candidates) > 1:
                random.shuffle(candidates)
                fighters = candidates[:2]
                candidates = candidates[2:]
                next_round.append(max(fighters))

            candidates += next_round
        
        return candidates
    
    def new_generation(self):
        """Creates a new generation
        
        Raises:
            RuntimeError: self.initialize() must be ran at least once before using this function
        
        Returns:
            Genetic_Vector[] -- List of Genetic Vectors
        """
        gen_size = self.members
        next_gen = []
        if len(self.pool) == 0:
            raise RuntimeError("New Generation cannot be generated without first initializing using the initialize() method")
        
        if self.elitism:
            next_gen.append(self.pool[-1])
            self.pool = self.pool[:-1]
        
        while len(next_gen) < gen_size:
            parents = self._parent_selection()
            try:
                next_gen.append(parents[0].mate(parents[1]))
            except TypeError as e:
                print(vars(parents[0]))
                print(vars(parents[1]))
                raise TypeError(e)
        
        self.pool = next_gen
        self.pool.sort()
    
    def initialization(self, generation_function = None):
        """Initializes the Genetic_Pool
        
        Keyword Arguments:
            generation_function {function} -- If provided, uses provided function to generate the initial population (default: {None})
        
        Returns:
            None -- No return
        """
        if generation_function is None:
            def generation_function():
                vector_list = []
                for lower, upper in self.template.parameter_bounds:
                    distance = upper - lower
                    
                    if isinstance(lower, float):
                        value = random.random() * distance + lower
                    else:
                        value = random.randint(lower, upper)
                    
                    vector_list.append(value)
                new_genetic_vector = Genetic_Vector(vector_list, 1,self.template.parameter_bounds, self.template.fitness, mutation_rate = self.template.mutation_rate, simple_mutation = self.template.simple_mutation, simple_crossover=self.template.simple_crossover, crossover_function = self.template.crossover_function, type_check = False)
                return new_genetic_vector
        
        for i in range(self.members):
            self.pool.append(generation_function())
        
        self.pool.sort()
        self.generation_max = 0
        self.score_max = self.pool[-1].fitness_val
    
    def initialize(self, generation_function):
        self.initialization(generation_function)
    
    def multiple_generations(self, generations, early_stopping = True, es_percent_generations = 0.3):
        """Runs multiple generations
        
        Arguments:
            generations {int} -- Number of generations you want to run
        
        Keyword Arguments:
            early_stopping {bool} -- Whether or not to use early stopping (default: {True})
            es_percent_generations {float} -- Early Stopping Percent of Generations. If early_stopping is true, how much of the generation without changes should indicate stopping (default: {0.3})
        
        Returns:
            [type] -- [description]
        """
        for i in range(generations):
            self.new_generation()
            if self.pool[-1].fitness_val > self.score_max:
                self.score_max = self.pool[-1].fitness_val
                self.generation_max = i + 1
            elif early_stopping:
                if (i - self.generation_max) > (es_percent_generations * generations):
                    print(f"Stopped at generation {i}")
                    break
        
        return self.pool[-1]


In [3]:
obs = [445.36, 453.20, 454.41,422.38,456.04,440.39,425.19,486.21,500.43,521.28,508.95,488.89,509.87,456.72,473.82,525.95,549.83,542.34]
yr = list(range(1996,2014))

In [4]:
adf = pd.DataFrame(list(zip(yr, np.ones(18), np.ones(18))), columns=['Year', 'Month', 'Day'])

bdf = pd.DataFrame(obs, columns=['Obs'])
bdf['Date'] = pd.to_datetime(adf)
bdf = bdf.set_index('Date')

In [5]:
f = das.exponential_smoothing(bdf['Obs'], .833924,0)
cdf = pd.Series(f[:-1],index = bdf.index).rename("Obs")

In [6]:
template = Genetic_Vector([0.5,50],1,[(0.0,1.0),(np.min(bdf['Obs']) * 0.86 ,np.max(bdf['Obs']) * 1.2)],fitness, simple_crossover = False, crossover_function = crossover)
template2 = genetic.Genetic_Vector([0.5],1,[(0.0,1.0)],fitness, simple_crossover = False, crossover_function = crossover)
template3 = Genetic_Vector([0.5,50],1,[(0.0,1.0),(np.min(bdf['Obs']) * 0.86 ,np.max(bdf['Obs']) * 1.2)],fitness2, simple_crossover = False, crossover_function = crossover)

In [11]:
p = Genetic_Pool(template, elitism=True)
p.initialization()

q = genetic.Genetic_Pool(template2, elitism=True)
q.initialization()

r = Genetic_Pool(template3, elitism=True)
r.initialization()

In [12]:
start_time = time.time()
k = vars(p.multiple_generations(1000, es_percent_generations = 0.1))
print(time.time() - start_time)
k

Stopped at generation 160
2.4222054481506348


{'vector': array([  0.83207288, 446.18799005]),
 'generation': 60,
 'parameter_bounds': [(0.0, 1.0), (363.2468, 659.796)],
 'fitness': <function __main__.fitness(alpha, initial_level=None)>,
 'simple_mutation': True,
 'mutation_rate': 0.018213451100363414,
 'simple_crossover': False,
 'crossover_function': <function __main__.crossover(params)>,
 'creep_mutate': True,
 'fitness_val': -14236.963046274564,
 'vector_length': 2}

In [13]:
start_time = time.time()
k = vars(q.multiple_generations(1000, es_percent_generations = 0.1))
print(time.time() - start_time)
k

Stopped at generation 106
1.1956074237823486


{'vector': array([0.82891849]),
 'generation': 6,
 'parameter_bounds': [(0.0, 1.0)],
 'fitness': <function __main__.fitness(alpha, initial_level=None)>,
 'simple_mutation': True,
 'mutation_rate': 0.016721335554412665,
 'simple_crossover': False,
 'crossover_function': <function __main__.crossover(params)>,
 'creep_mutate': True,
 'fitness_val': -14779.120153531763,
 'vector_length': 1}

In [14]:
start_time = time.time()
k = vars(r.multiple_generations(1000, es_percent_generations = 0.1))
print(time.time() - start_time)
k

Stopped at generation 227
0.6887760162353516


{'vector': array([  0.83470669, 446.86660681]),
 'generation': 127,
 'parameter_bounds': [(0.0, 1.0), (363.2468, 659.796)],
 'fitness': <function __main__.fitness2(alpha, initial_level=None)>,
 'simple_mutation': True,
 'mutation_rate': 0.007567383194684389,
 'simple_crossover': False,
 'crossover_function': <function __main__.crossover(params)>,
 'creep_mutate': True,
 'fitness_val': -14236.872940701558,
 'vector_length': 2}

In [15]:
%lprun -f p.new_generation p.new_generation()

Timer unit: 1e-07 s

Total time: 0.0414807 s
File: <ipython-input-2-7513c7d833ce>
Function: new_generation at line 207

Line #      Hits         Time  Per Hit   % Time  Line Contents
   207                                               def new_generation(self):
   208                                                   """Creates a new generation
   209                                                   
   210                                                   Raises:
   211                                                       RuntimeError: self.initialize() must be ran at least once before using this function
   212                                                   
   213                                                   Returns:
   214                                                       Genetic_Vector[] -- List of Genetic Vectors
   215                                                   """
   216         1         69.0     69.0      0.0          gen_size = self.members
   217       

In [16]:
%lprun -f r.new_generation r.new_generation()

Timer unit: 1e-07 s

Total time: 0.0066141 s
File: <ipython-input-2-7513c7d833ce>
Function: new_generation at line 207

Line #      Hits         Time  Per Hit   % Time  Line Contents
   207                                               def new_generation(self):
   208                                                   """Creates a new generation
   209                                                   
   210                                                   Raises:
   211                                                       RuntimeError: self.initialize() must be ran at least once before using this function
   212                                                   
   213                                                   Returns:
   214                                                       Genetic_Vector[] -- List of Genetic Vectors
   215                                                   """
   216         1         20.0     20.0      0.0          gen_size = self.members
   217       

In [18]:
%lprun -f mom.mate mom.mate(dad)

Timer unit: 1e-07 s

Total time: 0.0021988 s
File: <ipython-input-2-7513c7d833ce>
Function: mate at line 54

Line #      Hits         Time  Per Hit   % Time  Line Contents
    54                                               def mate(self, other):
    55                                                   """Mating Function
    56                                                   
    57                                                   Parameters
    58                                                   -----------
    59                                                   other: Genetic_Vector
    60                                                       The other parent
    61                                                   
    62                                                   Returns
    63                                                   -------
    64                                                   child: Genetic_Vector
    65                                                   

In [19]:
%lprun -f rmom.mate rmom.mate(rdad)

Timer unit: 1e-07 s

Total time: 0.0004085 s
File: <ipython-input-2-7513c7d833ce>
Function: mate at line 54

Line #      Hits         Time  Per Hit   % Time  Line Contents
    54                                               def mate(self, other):
    55                                                   """Mating Function
    56                                                   
    57                                                   Parameters
    58                                                   -----------
    59                                                   other: Genetic_Vector
    60                                                       The other parent
    61                                                   
    62                                                   Returns
    63                                                   -------
    64                                                   child: Genetic_Vector
    65                                                   

In [35]:
%lprun -f rmom.mate rmom.mate(rdad)

Timer unit: 1e-07 s

Total time: 0.0004089 s
File: <ipython-input-2-7513c7d833ce>
Function: mate at line 54

Line #      Hits         Time  Per Hit   % Time  Line Contents
    54                                               def mate(self, other):
    55                                                   """Mating Function
    56                                                   
    57                                                   Parameters
    58                                                   -----------
    59                                                   other: Genetic_Vector
    60                                                       The other parent
    61                                                   
    62                                                   Returns
    63                                                   -------
    64                                                   child: Genetic_Vector
    65                                                   

In [35]:
%lprun -f fitness fitness(0.5,50)

Timer unit: 1e-07 s

Total time: 0.0018003 s
File: <ipython-input-7-35825789bbce>
Function: fitness at line 11

Line #      Hits         Time  Per Hit   % Time  Line Contents
    11                                           def fitness(alpha, initial_level = None):
    12         1       3501.0   3501.0     19.4      f = das.exponential_smoothing(bdf['Obs'], alpha, 0, initial_level=initial_level)
    13         1       4459.0   4459.0     24.8      adf = pd.Series(f[:-1], index=bdf.index).rename("Obs")
    14         1      10043.0  10043.0     55.8      return sse(adf)

In [36]:
%lprun -f fitness2 fitness2(0.5, 50)

Timer unit: 1e-07 s

Total time: 0.0002848 s
File: <ipython-input-7-35825789bbce>
Function: fitness2 at line 16

Line #      Hits         Time  Per Hit   % Time  Line Contents
    16                                           def fitness2(alpha, initial_level = None):
    17         1       2848.0   2848.0    100.0      return das.fast_exponential_smoothing(bdf['Obs'],alpha, 0, initial_level=initial_level)

In [26]:
%lprun -f crossover crossover((0.1,0.9))

Timer unit: 1e-07 s

Total time: 8.82e-05 s
File: <ipython-input-1-35825789bbce>
Function: crossover at line 19

Line #      Hits         Time  Per Hit   % Time  Line Contents
    19                                           def crossover(params):
    20         1         20.0     20.0      2.3      mom, dad = params
    21         1        678.0    678.0     76.9      avg = np.mean([mom, dad])
    22         1         39.0     39.0      4.4      std = max((avg + mom) / 2,.1)
    23         1        145.0    145.0     16.4      return np.random.normal(avg, std)

In [20]:
%lprun -f crossover crossover((0.1,0.9))

Timer unit: 1e-07 s

Total time: 3.77e-05 s
File: <ipython-input-1-dd2356bf5ca7>
Function: crossover at line 19

Line #      Hits         Time  Per Hit   % Time  Line Contents
    19                                           def crossover(params):
    20         1         20.0     20.0      5.3      mom, dad = params
    21         1         23.0     23.0      6.1      avg = (mom + dad) / 2
    22         1         25.0     25.0      6.6      std = max((avg + mom) / 2,.1)
    23         1        309.0    309.0     82.0      return np.random.normal(avg, std)

In [17]:
mom, dad = p.pool[-2:]
rmom, rdad = r.pool[-2:]

{'vector': array([  0.74227703, 426.29766868]),
 'generation': 653,
 'parameter_bounds': [(0.0, 1.0), (363.2468, 659.796)],
 'fitness': <function __main__.fitness(alpha, initial_level=None)>,
 'simple_mutation': True,
 'mutation_rate': 0.0003587751202157722,
 'simple_crossover': False,
 'crossover_function': <function __main__.crossover(params)>,
 'creep_mutate': True,
 'fitness_val': -14783.51436726081,
 'vector_length': 2}

In [32]:
np.mean([1,2,3,4,5,6,7])

4.0