In [7]:
import neat
import pandas as pd
import numpy as np
import time

cardio_dataset = pd.read_csv('3-cleaned-normalized.csv')

dataset_size = cardio_dataset.shape[0]
max_items = 100

# Load configuration.
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'neat-config-cardio')



# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)

# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(False))

def load_data(num_items, num_features="all", random_state=1):    
    cardio = cardio_dataset.sample(n=num_items, random_state=random_state)
    d = cardio.iloc[:,0:11]
    t = cardio['cardio']
    data = d.values
    target = t.values
    return data, target

class DynamicRunner:
    
    def __init__(self):
        self.data = None
        self.target = None
    
    def eval_genomes(self, genomes, config):
        for genome_id, genome in genomes:
            genome.fitness = 1.0
            net = neat.nn.FeedForwardNetwork.create(genome, config)
            # print(len(self.data), len(self.target))
            for xi, xo in zip(self.data, self.target):
                output = net.activate(xi)
                genome.fitness -= (abs(output[0] - xo))/len(self.target)
    
    def run_dynamic(self, max_items):
    
        items = 100
        iters_single = 5
        score = 0
        
        while items <= max_items:
            self.data, self.target = load_data(num_items=items)
        
            print("\n\n", len(self.data), len(self.target), "\n\n")
        
            winner = p.run(self.eval_genomes,iters_single)
            print('\nBest genome:\n{!s}'.format(winner))
            
            val = self.validate(winner, items)

            if val <= score or items == max_items:    # no improvement, or has run once on max_items
                items *= 2
                iters_single += 5
                score = 0            # make sure to account for val drops
            else:
                score = val   
            
        return winner

    
    def validate(self, winner, items):
        
        winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
        data, target = load_data(num_items=items, random_state=100)
        
        predictions = []
        for xi, xo in zip(data, target):
            output = winner_net.activate(xi)
            predictions.append(round(output[0]))

        acc = sum([abs(predictions[i]-target[i]) for i in range(len(target))])
        acc = 1 - acc/len(target)
        print('\nValidation Accuracy:', acc)  
        return acc
        
    
    def results(self, winner, max_items):
                      
        # Show output of the most fit genome against training data.
        print('\nOutput:')
        winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
        predictions = []
        for xi, xo in zip(self.data, self.target):
            output = winner_net.activate(xi)
            predictions.append(round(output[0]))
            # print("  input {!r}, expected output {!r}, got {!r}".format(xi, xo, output))
    
        acc = sum([abs(predictions[i]-self.target[i]) for i in range(len(self.target))])
        acc = 1 - acc/len(self.target)
        print('\nTraining Accuracy:', acc)

        data, target = load_data(num_items=max_items)
        predictions = []
        for xi, xo in zip(data, target):
            output = winner_net.activate(xi)
            predictions.append(round(output[0]))

        acc = sum([abs(predictions[i]-target[i]) for i in range(len(target))])
        acc = 1 - acc/len(target)
        print('\nTesting Accuracy:', acc)        

start = time.time()
d = DynamicRunner()
winner = d.run_dynamic(max_items)
d.results(winner, dataset_size)
end = time.time()
print("\nTotal time:", end-start, "seconds")



 100 100 



 ****** Running generation 0 ****** 

Population's average fitness: 0.50374 stdev: 0.04925
Best fitness: 0.63749 - size: (1, 11) - species 1 - id 122
Average adjusted fitness: 0.193
Mean genetic distance 1.201, standard deviation 0.409
Population of 150 members in 1 species
Total extinctions: 0
Generation time: 0.173 sec

 ****** Running generation 1 ****** 

Population's average fitness: 0.53486 stdev: 0.04517
Best fitness: 0.65326 - size: (1, 10) - species 1 - id 233
Average adjusted fitness: 0.155
Mean genetic distance 1.397, standard deviation 0.443
Population of 150 members in 1 species
Total extinctions: 0
Generation time: 0.194 sec (0.184 average)

 ****** Running generation 2 ****** 

Population's average fitness: 0.54747 stdev: 0.04981
Best fitness: 0.68669 - size: (2, 10) - species 1 - id 432
Average adjusted fitness: 0.105
Mean genetic distance 1.575, standard deviation 0.422
Population of 150 members in 1 species
Total extinctions: 0
Generation time: 0.171 se