In [1]:
import numpy as np
from numpy import random as rnd
from matplotlib import pyplot as plt
import warnings,datetime,csv,os

import tensorflow as tf
import pandas as pd
import seaborn as sns

from keras.models import Sequential,Model
from keras.layers import Dense,Conv2D,Conv2DTranspose,BatchNormalization,Dropout,Input,concatenate,add,advanced_activations,Activation,AvgPool2D,Bidirectional,Reshape,LSTM,GRU

from sklearn.cluster import AffinityPropagation,AgglomerativeClustering,Birch,DBSCAN,KMeans,MeanShift,MiniBatchKMeans,SpectralClustering
from sklearn.datasets import load_boston
from sklearn.decomposition import DictionaryLearning,FactorAnalysis,KernelPCA,PCA,TruncatedSVD
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.dummy import DummyClassifier,DummyRegressor
from sklearn.ensemble import AdaBoostClassifier,AdaBoostRegressor,BaggingClassifier,BaggingRegressor,ExtraTreesClassifier,ExtraTreesRegressor,GradientBoostingClassifier,GradientBoostingRegressor,RandomForestRegressor,RandomForestClassifier
from sklearn.feature_extraction import DictVectorizer,FeatureHasher
from sklearn.feature_selection import SelectPercentile,VarianceThreshold
from sklearn.impute import MissingIndicator,SimpleImputer
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import ARDRegression,BayesianRidge,Hinge,Huber,Lasso,LinearRegression,LogisticRegression,Ridge,RidgeClassifier
from sklearn.manifold import Isomap,LocallyLinearEmbedding,TSNE
from sklearn.mixture import BayesianGaussianMixture,GaussianMixture
from sklearn.model_selection import BaseCrossValidator,GridSearchCV,KFold,train_test_split
from sklearn.neighbors import BallTree,DistanceMetric,KNeighborsClassifier,KNeighborsRegressor,NearestCentroid,NearestNeighbors
from sklearn.neural_network import BernoulliRBM,MLPClassifier,MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Binarizer,CategoricalEncoder,FunctionTransformer,Imputer,KBinsDiscretizer,MinMaxScaler,OneHotEncoder,StandardScaler
from sklearn.random_projection import GaussianRandomProjection,SparseRandomProjection

warnings.filterwarnings('ignore')
sns.set()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [247]:
def test_ind(dna,Xtrain,ytrain,Xtest,ytest):
    clf = LogisticRegression()
    clf.fit(np.delete(Xtrain,np.where(dna==0)[0],axis=1),ytrain.astype('int'))
    cscore = clf.score(np.delete(Xtest,np.where(dna==0)[0],axis=1),ytest.astype('int'))
    return cscore

def compete_P(DNA,X,y,sel_pressure=1.9):
    Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=0.3,shuffle=True)
    Pscores = []
    for p in range(DNA.shape[0]):
        dna = DNA[p,:]
        Pscores.append(test_ind(dna,Xtrain,ytrain,Xtest,ytest))
    Pscores = np.stack(Pscores)
    DNA_sorted = DNA[np.argsort(-Pscores)]
    Pscores_sorted = Pscores[np.argsort(-Pscores)]
    fitness = np.arange(1,DNA_sorted.shape[0]+1)*sel_pressure
    return DNA_sorted,fitness,Pscores_sorted

def cross_parents(p0,p1,p_p0=0.5):
    detP = rnd.choice([0,1],p0.shape[0],p=[p_p0,1-p_p0])
    child = np.array([p0[k] if detP[k]==0 else p1[k] for k in range(p0.shape[0])])
    return child

def mutate(child,p_mutate=0.1):
    for k in range(child.shape[0]):
        if rnd.rand()<p_mutate:child[k]=(child[k]+1)%2
    return child

def breed(DNA,fitness,P,k_best=2,k_worst=2,p_fac=1.7):
    NEWDNA = [DNA[k,:] for k in range(k_best)]
    DNA = np.delete(DNA,range(k_best),axis=0)
    DNA = np.delete(DNA,range(DNA.shape[0]-k_worst,DNA.shape[0]),axis=0)
    fitness = np.delete(fitness,range(k_best),axis=0)
    fitness = np.delete(fitness,range(fitness.shape[0]-k_worst,fitness.shape[0]),axis=0)
    p_dist = (1/(fitness**p_fac))
    p_dist = p_dist/p_dist.sum()
    while len(NEWDNA)<P:
        parents = rnd.choice(np.arange(0,DNA.shape[0]),2,p=p_dist)
        if parents[0]==parents[1]: continue
        child = cross_parents(DNA[parents[0],:],DNA[parents[1],:])
        child = mutate(child)
        NEWDNA.append(child)
    NEWDNA = np.stack(NEWDNA,axis=0)
    return NEWDNA

In [248]:
X,y = load_boston(return_X_y=True)

P = 500
generations = 25

num_genes = X.shape[1]
DNA = rnd.randint(0,2,(P,num_genes))

In [249]:
POP_STORE = []
SCORE_STORE = []
FIT_STORE = []

for g in range(generations):
    DNA,fitness,Pscores = compete_P(DNA,X,y)
    POP_STORE.append(DNA)
    SCORE_STORE.append(Pscores)
    FIT_STORE.append(fitness)
    DNA = breed(DNA,fitness,P)
    print('Step %d/%d --> Average score = %.2f'%(g,generations,Pscores.mean()))

Step 0/25 --> Average score = 0.10
Step 1/25 --> Average score = 0.10
Step 2/25 --> Average score = 0.10
Step 3/25 --> Average score = 0.12
Step 4/25 --> Average score = 0.12
Step 5/25 --> Average score = 0.11
Step 6/25 --> Average score = 0.08
Step 7/25 --> Average score = 0.11
Step 8/25 --> Average score = 0.12
Step 9/25 --> Average score = 0.10
Step 10/25 --> Average score = 0.12
Step 11/25 --> Average score = 0.10
Step 12/25 --> Average score = 0.12
Step 13/25 --> Average score = 0.12
Step 14/25 --> Average score = 0.08
Step 15/25 --> Average score = 0.12
Step 16/25 --> Average score = 0.09
Step 17/25 --> Average score = 0.11
Step 18/25 --> Average score = 0.12
Step 19/25 --> Average score = 0.11
Step 20/25 --> Average score = 0.12
Step 21/25 --> Average score = 0.13
Step 22/25 --> Average score = 0.12
Step 23/25 --> Average score = 0.07
Step 24/25 --> Average score = 0.12
