In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import ipdb
import time
import sys

import chart_studio.plotly as ply
import chart_studio.tools as plytool
import plotly.figure_factory as ff
import plotly.graph_objs as go
import plotly.offline as plyoff
import plotly.subplots as plysub

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# my imports
sys.path.append('../src/')
from GA.GA import *
from GA.Objective import *

In [3]:
''' generate some data '''
np.random.seed(42)
p = 10
n = 100

# generate the features & target
X = np.random.normal(loc=0, scale=1, size=(n,p))
y = X[:, 0]*5 + X[:, 1]*5 + np.random.normal(loc=0, scale=1, size=n)
simName = '5X0+2X1'

# create the dataframe
feats = ['X%d'%i for i in range(p)]
data = pd.DataFrame(data=y, columns=['target'])
data[feats] = X

# talk
display(data.head())

Unnamed: 0,target,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9
0,3.191605,0.496714,-0.138264,0.647689,1.52303,-0.234153,-0.234137,1.579213,0.767435,-0.469474,0.54256
1,-3.721104,-0.463418,-0.46573,0.241962,-1.91328,-1.724918,-0.562288,-1.012831,0.314247,-0.908024,-1.412304
2,6.258993,1.465649,-0.225776,0.067528,-1.424748,-0.544383,0.110923,-1.150994,0.375698,-0.600639,-0.291694
3,5.605921,-0.601707,1.852278,-0.013497,-1.057711,0.822545,-1.220844,0.208864,-1.95967,-1.328186,0.196861
4,5.247398,0.738467,0.171368,-0.115648,-0.301104,-1.478522,-0.719844,-0.460639,1.057122,0.343618,-1.76304


In [4]:
''' prepare GA input parameters '''
# GA parameters
parmsGA = {'initPerc':0.5, 'forceVars':None, 'showTopSubs':10, 'populSize':20, 'numGens':10,
           'noChangeTerm':180, 'convgCrit':0.00001, 'elitism':True, 'mateType':1, 'probXover':0.8,
           'probMutate':0.3, 'probEngineer':0.2, 'optimGoal':-1, 'plotFlag':True, 'printFreq':10,
           'xoverType':1}
# data parameters
parmsData = {'data':data, 'name':simName}
# objective parameters
parmsObj = {'function':'RegressionMetric',
            'arguments':{'data':None, 'subset':None, 'metric':'RMSE', 'optimGoal':parmsGA['optimGoal']}}

In [5]:
# run the GA - hold on to your butts
randSeed = 42
verb = False
MSims = 5

# init
bestSubss = [None]*MSims
bestScores = [None]*MSims
genBestss = [None]*MSims
genScoress = [None]*MSims
randSeeds = [None]*MSims
timeStamps = [None]*MSims
figGPProgresss = [None]*MSims
seedSubs = []

for sim in range(MSims):
    print('Executing GA %d of %d'%(sim+1, MSims))
    bestSubss[sim], bestScores[sim], genBestss[sim], genScoress[sim],\
    randSeeds[sim], timeStamps[sim], figGPProgresss[sim] = RunGASubset(parmsGA, parmsData, parmsObj, seedSubs, verb, randSeed)
    # add the best subset to seed the next GP run, if new
    try:
        seedSubs.index(bestSubss[sim])
    except ValueError:
        # this best is new, so add
        seedSubs.append(bestSubss[sim])

# get the overall best
bestIndx = np.argmax(parmsGA['optimGoal']*np.array(bestScores))
bestScore = bestScores[bestIndx]
bestSubs = bestSubss[bestIndx]
timeStamp = timeStamps[bestIndx]

Executing GA 1 of 5
##########################################
GA Started on 2022-01-08T18:16:04.678326
##########################################
Data: 5X0+2X1(n=100, p=10)
Random Seed: 42
Maximum # Generations: 10
Mininum # of Generations: 180
Convergence Criteria: 0.00001000
Population Size: 20
Initial Fill Percentage: 0.50
Features Forced in all Models: None
Initial Population Seeded with 0 Subsets
Mutation Rate: 0.30
Crossover Rate: 0.80
Crossover Method: SINGLE
Mating Method: SORTED
Elitism is: ON
!!With Elitism ON, the probability of GA engineering has been set to 0.00!!
##########################################
Objective: MINIMIZE
Objective Function: RegressionMetric(metric='RMSE', optimGoal=-1)
##########################################
Generation 1 of 10: Best Score = 0.9292, Early Termination = 1
	1110101001
Generation 10 of 10: Best Score = 0.9258, Early Termination = 8
	1111111001
##########################################
GA Complete
	Unique Subsets Evaluated - 233
Top 1

Unnamed: 0,Score,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,Frequency
1111111001,0.925756,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.8
1100111001,0.92802,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.1
1110101001,0.929241,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.1


GA: Started on 2022-01-08T18:16:04.678326
	Finished on 2022-01-08T18:16:06.753041
	Elapsed Time = 0.035(m)
Executing GA 2 of 5
##########################################
GA Started on 2022-01-08T18:16:06.753041
##########################################
Data: 5X0+2X1(n=100, p=10)
Random Seed: 42
Maximum # Generations: 10
Mininum # of Generations: 180
Convergence Criteria: 0.00001000
Population Size: 20
Initial Fill Percentage: 0.50
Features Forced in all Models: None
Initial Population Seeded with 1 Subsets
Mutation Rate: 0.30
Crossover Rate: 0.80
Crossover Method: SINGLE
Mating Method: SORTED
Elitism is: ON
!!With Elitism ON, the probability of GA engineering has been set to 0.00!!
##########################################
Objective: MINIMIZE
Objective Function: RegressionMetric(metric='RMSE', optimGoal=-1)
##########################################
Generation 1 of 10: Best Score = 0.9258, Early Termination = 1
	1111111001
Generation 10 of 10: Best Score = 0.9249, Early Termination =

Unnamed: 0,Score,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,Frequency
1111111111,0.924859,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.3
1111111101,0.92574,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.1
1111111001,0.925756,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.6


GA: Started on 2022-01-08T18:16:06.753041
	Finished on 2022-01-08T18:16:08.699141
	Elapsed Time = 0.032(m)
Executing GA 3 of 5
##########################################
GA Started on 2022-01-08T18:16:08.700139
##########################################
Data: 5X0+2X1(n=100, p=10)
Random Seed: 42
Maximum # Generations: 10
Mininum # of Generations: 180
Convergence Criteria: 0.00001000
Population Size: 20
Initial Fill Percentage: 0.50
Features Forced in all Models: None
Initial Population Seeded with 2 Subsets
Mutation Rate: 0.30
Crossover Rate: 0.80
Crossover Method: SINGLE
Mating Method: SORTED
Elitism is: ON
!!With Elitism ON, the probability of GA engineering has been set to 0.00!!
##########################################
Objective: MINIMIZE
Objective Function: RegressionMetric(metric='RMSE', optimGoal=-1)
##########################################
Generation 1 of 10: Best Score = 0.9249, Early Termination = 1
	1111111111
Generation 10 of 10: Best Score = 0.9249, Early Termination =

Unnamed: 0,Score,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,Frequency
1111111111,0.924859,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


GA: Started on 2022-01-08T18:16:08.700139
	Finished on 2022-01-08T18:16:10.263712
	Elapsed Time = 0.026(m)
Executing GA 4 of 5
##########################################
GA Started on 2022-01-08T18:16:10.264709
##########################################
Data: 5X0+2X1(n=100, p=10)
Random Seed: 42
Maximum # Generations: 10
Mininum # of Generations: 180
Convergence Criteria: 0.00001000
Population Size: 20
Initial Fill Percentage: 0.50
Features Forced in all Models: None
Initial Population Seeded with 3 Subsets
Mutation Rate: 0.30
Crossover Rate: 0.80
Crossover Method: SINGLE
Mating Method: SORTED
Elitism is: ON
!!With Elitism ON, the probability of GA engineering has been set to 0.00!!
##########################################
Objective: MINIMIZE
Objective Function: RegressionMetric(metric='RMSE', optimGoal=-1)
##########################################
Generation 1 of 10: Best Score = 0.9249, Early Termination = 1
	1111111111
Generation 10 of 10: Best Score = 0.9249, Early Termination =

Unnamed: 0,Score,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,Frequency
1111111111,0.924859,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


GA: Started on 2022-01-08T18:16:10.264709
	Finished on 2022-01-08T18:16:11.885770
	Elapsed Time = 0.027(m)
Executing GA 5 of 5
##########################################
GA Started on 2022-01-08T18:16:11.885770
##########################################
Data: 5X0+2X1(n=100, p=10)
Random Seed: 42
Maximum # Generations: 10
Mininum # of Generations: 180
Convergence Criteria: 0.00001000
Population Size: 20
Initial Fill Percentage: 0.50
Features Forced in all Models: None
Initial Population Seeded with 4 Subsets
Mutation Rate: 0.30
Crossover Rate: 0.80
Crossover Method: SINGLE
Mating Method: SORTED
Elitism is: ON
!!With Elitism ON, the probability of GA engineering has been set to 0.00!!
##########################################
Objective: MINIMIZE
Objective Function: RegressionMetric(metric='RMSE', optimGoal=-1)
##########################################
Generation 1 of 10: Best Score = 0.9249, Early Termination = 1
	1111111111
Generation 10 of 10: Best Score = 0.9249, Early Termination =

Unnamed: 0,Score,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,Frequency
1111111111,0.924859,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


GA: Started on 2022-01-08T18:16:11.885770
	Finished on 2022-01-08T18:16:13.393819
	Elapsed Time = 0.025(m)
