In [145]:
import random
from tqdm import tqdm
from math import exp
from math import log1p
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score

One instance

In [314]:
class Being():
    
    weights = []
    variables = []
    e = []
    eW = []
    ln = []
    lnW = []
    features = 0
    pruning = 1
    weight_sigma = 10
    variable_power = 3
    metric = 'acc'
    lr = 3
    
    def __init__(self, features=None, variable_power=3, weight_sigma=5, lr = 1, pruning=1, parents=[], metric='acc', random = True):
        if parents == []:
            if features==None:
                raise ValueError('features cannot be initialized as None')
            self.features = features
            self.variable_power = variable_power
            self.weight_sigma = weight_sigma
            self.lr = lr
            self.pruning = pruning
            self.variables = []
            self.weights = []
            self.metric = metric
            self.e = []
            self.ln = []
            self.eW = []
            self.lnW = []
            self.setup()
        else:
            self.features = parents[0].features
            self.variable_power = parents[0].variable_power
            self.weight_sigma = parents[0].weight_sigma
            self.lr = lr
            self.pruning = parents[0].pruning
            self.metric = parents[0].metric
            self.variables = []
            self.weights = []
            self.e = []
            self.ln = []
            self.eW = []
            self.lnW = []
            if random:
                self.setup()
            else:
                self.mutate(parents)
    
    def setup(self):
        for i in range(int(self.features*self.variable_power*random.random()*2+1)):
            temp = [random.randint(0, self.features-1)]
            for i in range(int(self.variable_power*random.random())):
                temp.append(random.randint(0, self.features-1))
            self.variables.append(temp)
        for i in range(len(self.variables)-1):
            if random.random() > .95:
                if random.random() > .5:
                    self.e.append(self.variables[i])
                else:
                    self.ln.append(self.variables[i])
        for i in range(len(self.variables)-1):
            self.weights.append(random.gauss(0, self.weight_sigma))
        for i in range(len(self.e)-1):
            self.eW.append(random.gauss(0, self.weight_sigma))
        for i in range(len(self.ln)-1):
            self.lnW.append(random.gauss(0, self.weight_sigma))

    
    def mutate(self, parents):
        for parent in parents:
            for var, weight in zip(parent.variables, parent.weights):
                if random.random() < self.pruning/len(parents):
                    self.variables.append(var)
                    self.weights.append(weight+random.gauss(0, self.lr))
            for e, eW in zip(parent.e, parent.eW):
                if random.random() < self.pruning/len(parents):
                    self.e.append(e)
                    self.eW.append(eW+random.gauss(0, self.lr))
            for ln, lnW in zip(parent.ln, parent.lnW):
                if random.random() < self.pruning/len(parents):
                    self.ln.append(ln)
                    self.lnW.append(lnW+random.gauss(0, self.lr))
        for i in range(int(self.pruning/random.random())):
            temp = [random.randint(0, self.features-1)]
            for i in range(int(self.variable_power*random.random())):
                temp.append(random.randint(0, self.features-1))
            if random.random() > .99:
                if random.random() > .5:
                    self.e.append(temp)
                    self.eW.append(random.gauss(0, self.weight_sigma))
                else:
                    self.ln.append(temp)
                    self.lnW.append(random.gauss(0, self.weight_sigma))
            self.variables.append(temp)
            self.weights.append(random.gauss(0, self.weight_sigma)) 
            
    
    def predict(self, x):
        pred = []
        for data in x:
            result = 0.0
            for var, weight in zip(self.variables, self.weights):
                mult = weight
                for elem in var:
                    mult *= data[elem]
                result += mult
            for e, eW in zip(self.e, self.eW):
                mult = 1
                for elem in e:
                    mult *= data[elem]
                try:
                    result += eW*exp(mult) 
                except OverflowError:
                    result += eW*exp(1/mult)
            for ln, lnW in zip(self.ln, self.lnW):
                mult = 1
                for elem in ln:
                    mult *= data[elem]
                result += lnW*log1p(abs(mult))
            try:
                act = 1 / (1 + exp(-result))
            except OverflowError: # want to kill being because numbers are too big \
                act = .5
            pred.append(act)
        return pred
    
    def evaluate(self, x, y, metric = ''):
        if metric == '': 
            metric = self.metric
        pred = self.predict(x)
        if metric=='acc':
            correct = 0
            incorrect = 0
            for y1, yp1 in zip(y, pred):
                if (yp1 >= .5 and y1 >= .5) or (yp1 < .5 and y1 < .5):
                    correct += 1
                else:
                    incorrect += 1
            return (correct+0.0)/(correct+incorrect+0.0)   
        elif metric=='roc':
            pred = np.nan_to_num(pred)
            return roc_auc_score(y, pred)


In [315]:
a = Being(3)
a.predict(np.array([[1, 1, 1]]))

[6.925778071661406e-10]

Creating and Killing Instances

In [338]:
class Slaughterhouse():
    
    population = 0
    random = .3
    max_diversity = 20
    template = None
    decay = 1
    lr = 1
    beings = []
    parents = []
    outlast = 0.01
    min_diversity = 4
    being_type = None
    
    def __init__(self, template, population, random=0.4, max_diversity=20, decay=1, outlast=0.01, min_diversity = 4):
        self.population = population
        self.random = random
        self.max_diversity = max_diversity
        self.min_diversity = min_diversity
        self.template = template
        self.decay = decay
        self.lr = template.lr
        self.beings = []
        self.parents = []
        self.outlast = outlast
        self.being_type = type(template)
    
    def create_generation(self):
        for i in range(self.population):
            self.beings.append([0, self.being_type(parents=[self.template], lr=self.lr)])
    
    def score_generation(self, x, y):
        for i in tqdm(range(len(self.beings))):
            self.beings[i][0] = self.beings[i][1].evaluate(x, y)
        self.beings = sorted(self.beings, key=lambda x: x[0], reverse=True)
    
    def next_generation(self):
        self.get_parents()
        self.beings = []
        for parent in self.parents:
            self.beings.append([0, parent])
        for i in range(self.population - len(self.parents)):
            if random.random() > self.random:
                self.beings.append([0, self.being_type(parents=self.parents, lr=self.lr)])
            else:
                if random.random() > .5:
                    self.beings.append([0, self.being_type(parents=[self.parents[int(random.random()*len(self.parents))]], lr=self.lr)])
                else:
                    self.beings.append([0, self.being_type(parents=[self.template], lr=self.lr)])
    
    def get_parents(self):
        self.parents = []
        high = self.beings[0][0]
        for i in range(self.max_diversity):
            if high - self.outlast < self.beings[i][0] or i < self.min_diversity:
                self.parents.append(self.beings[i][1])
                print('Parent Acc:', self.beings[i][0])
    
    def go(self, x, y, generations=1):
        if self.beings == []:
            print('Start Generation')
            self.create_generation()
            self.score_generation(x, y)
        for i in range(generations):
            print('Generation ', i+1)
            self.next_generation()
            self.score_generation(x, y)
            self.lr *= self.decay
        self.get_parents()
        return self.parents
        

Using Titanic Data

In [339]:
titanic = pd.read_csv("titanic_dropped.csv")
x = np.array(titanic.drop(['Survived'], axis=1))
y = np.array(titanic['Survived'])

In [340]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2)

In [341]:
template = Being(7, lr=1, variable_power=6, weight_sigma=5, metric='roc')

In [342]:
house = Slaughterhouse(template, 1000, decay=.99)

In [None]:
final = house.go(x_train, y_train, generations=10)

Start Generation


  2%|▏         | 17/1000 [00:00<00:20, 48.91it/s]

In [331]:
final[0].evaluate(x_test, y_test, metric = 'acc')

0.7832167832167832