In [1]:
import numpy as np
import matplotlib.pyplot as plt
import cma
import random 
from es import SimpleGA, CMAES, PEPG, OpenES, PEPGVariant, SlideWindow,Adam

from testbed import * 
import pickle 
import multiprocessing as mp
import copy 


In [2]:
def calProduct(x): 
    _x=np.log(x)
    return np.exp(_x.sum())

def calEntropy(x):
    _x=np.log(x)
    return _x.sum()

In [3]:
def squareSum(x):
    return -1*np.square(x).sum()

In [4]:
def config():
	global NPARAMS
	global NPOPULATION
	global MAX_ITERATION
	global fit_func

	NPARAMS = 32    # make this a 100-dimensinal problem.
	NPOPULATION = 101    # use population size of 101.
	MAX_ITERATION = 80000 # run each solver for 5000 generations.
# 	fit_func=rastrigin
# 	fit_func = dejong
	# fit_func = hyperEllipsoid
# 	fit_func =schwefel
# 	fit_func =griewangk
	fit_func = rosebrock
# 	fit_func = squareSum

	NPOPULATION=int(4+3*np.ceil(np.log(NPARAMS))) # setting as approximate 
	NPOPULATION= int(NPOPULATION/2)*2+1
	np.random.seed(0)
	random.seed(0) 

In [5]:
config()
print(NPARAMS)
print(NPOPULATION)
print(fit_func)
print(fit_func([0.0,0.0]))

32
17
<function rosebrock at 0x000002CCF3973268>
-1.0


In [16]:
class PEPGAdaptive:
  '''Extension of PEPG with adaptive diversity_base'''

  def __init__(self, num_params,             # number of model parameters
               sigma_init=0.10,              # initial standard deviation
               sigma_alpha=0.20,             # learning rate for standard deviation
               sigma_decay=0.999,            # anneal standard deviation
               sigma_limit=0.01,             # stop annealing if less than this
               sigma_max_change=0.2,         # clips adaptive sigma to 20%
               learning_rate=0.01,           # learning rate for standard deviation
               learning_rate_decay = 0.9999, # annealing the learning rate
               learning_rate_limit = 0.01,   # stop annealing learning rate
               elite_ratio = 0,              # if > 0, then ignore learning_rate
               popsize=256,                  # population size
               average_baseline=True,        # set baseline to average of batch
               weight_decay=0.01,            # weight decay coefficient
               rank_fitness=True,            # use rank rather than fitness numbers
               diversity_base=0.1,
               option = 0,                   # 0: default selection _sigma, 1: huan version without sigma 
               forget_best=True):            # don't keep the historical best solution

    self.num_params = num_params
    self.sigma_init = sigma_init
    self.sigma_alpha = sigma_alpha
    self.sigma_decay = sigma_decay
    self.sigma_limit = sigma_limit
    self.sigma_max_change = sigma_max_change
    self.learning_rate = learning_rate
    self.learning_rate_decay = learning_rate_decay
    self.learning_rate_limit = learning_rate_limit
    self.popsize = popsize
    self.average_baseline = average_baseline
    self.diversity_base = diversity_base  # Can i get the basic idea to run
    self.option = option

    if self.average_baseline:
      assert (self.popsize % 2 == 0), "Population size must be even"
      self.batch_size = int(self.popsize / 2)
    else:
      assert (self.popsize & 1), "Population size must be odd"
      self.batch_size = int((self.popsize - 1) / 2)

    # option to use greedy es method to select next mu, rather than using drift param
    self.elite_ratio = elite_ratio
    self.elite_popsize = int(self.popsize * self.elite_ratio)
    self.use_elite = False
    if self.elite_popsize > 0:
      self.use_elite = True

    self.forget_best = forget_best
    self.batch_reward = np.zeros(self.batch_size * 2)
    #
    self.mu = np.zeros(self.num_params)
    self.sigma = np.ones(self.num_params) * self.sigma_init
    self.curr_best_mu = np.zeros(self.num_params)
    self.best_mu = np.zeros(self.num_params)
    self.best_reward = 0
    self.first_interation = True
    self.weight_decay = weight_decay
    self.rank_fitness = rank_fitness
    if self.rank_fitness:
      self.forget_best = True # always forget the best one if we rank
    # choose optimizer
    self.optimizer = Adam(self, learning_rate)
    self.rewardWindow=SlideWindow(num_params) # adaptive diversity function  
    self.entropyWindow=SlideWindow(num_params)  # adaptive diversity function 
    self.diversityWindow =SlideWindow(4)  # recording the history diversity  




  def rms_stdev(self):
    sigma = self.sigma
    return np.mean(np.sqrt(sigma*sigma))

  def ask(self):
    '''returns a list of parameters'''
    # antithetic sampling
    self.epsilon = np.random.randn(self.batch_size, self.num_params) * self.sigma.reshape(1, self.num_params)
    self.epsilon_full = np.concatenate([self.epsilon, - self.epsilon])
    if self.average_baseline:
      epsilon = self.epsilon_full
    else:
      # first population is mu, then positive epsilon, then negative epsilon
      epsilon = np.concatenate([np.zeros((1, self.num_params)), self.epsilon_full])
    solutions = self.mu.reshape(1, self.num_params) + epsilon
    self.solutions = solutions
    return solutions

  def tell(self, reward_table_result):
    # input must be a numpy float array
    assert(len(reward_table_result) == self.popsize), "Inconsistent reward_table size reported."

    reward_table = np.array(reward_table_result)
    
    if self.rank_fitness:
      reward_table = compute_centered_ranks(reward_table)
    
    if self.weight_decay > 0:
      l2_decay = compute_weight_decay(self.weight_decay, self.solutions)
      reward_table += l2_decay

    reward_offset = 1




    if self.average_baseline:
      b = np.mean(reward_table)
      reward_offset = 0
    else:
      b = reward_table[0] # baseline
      
    reward = reward_table[reward_offset:]

    self.rewardWindow.update(np.array(reward).mean())
    self.entropyWindow.update(calEntropy(self.sigma)) # thanks  

    if self.use_elite:
      idx = np.argsort(reward)[::-1][0:self.elite_popsize]
    else:
      idx = np.argsort(reward)[::-1]

    best_reward = reward[idx[0]]
    if (best_reward > b or self.average_baseline):
      best_mu = self.mu + self.epsilon_full[idx[0]]
      best_reward = reward[idx[0]]
    else:
      best_mu = self.mu
      best_reward = b

    self.curr_best_reward = best_reward
    self.curr_best_mu = best_mu

    if self.first_interation:
      self.sigma = np.ones(self.num_params) * self.sigma_init
      self.first_interation = False
      self.best_reward = self.curr_best_reward
      self.best_mu = best_mu
    else:
      if self.forget_best or (self.curr_best_reward > self.best_reward):
        self.best_mu = best_mu
        self.best_reward = self.curr_best_reward

    # short hand
    epsilon = self.epsilon
    sigma = self.sigma

    # update the mean

    # move mean to the average of the best idx means
    if self.use_elite:
      self.mu += self.epsilon_full[idx].mean(axis=0)
    else:
      rT = (reward[:self.batch_size] - reward[self.batch_size:])
      change_mu = np.dot(rT, epsilon)
      self.optimizer.stepsize = self.learning_rate
      update_ratio = self.optimizer.update(-change_mu) # adam, rmsprop, momentum, etc.
      #self.mu += (change_mu * self.learning_rate) # normal SGD method

    # adaptive sigma
    # normalization

    if (self.sigma_alpha > 0):
      stdev_reward = 1.0
      if not self.rank_fitness:
        stdev_reward = reward.std()
      
      S = ((epsilon * epsilon - (sigma * sigma).reshape(1, self.num_params)) / sigma.reshape(1, self.num_params))
      reward_avg = (reward[:self.batch_size] + reward[self.batch_size:]) / 2.0
      rS = reward_avg - b
      
      
      delta_sigma = (np.dot(rS, S)) / (2 * self.batch_size * stdev_reward)
      

      # adjust sigma according to the adaptive sigma calculation
      # for stability, don't let sigma move more than 10% of orig value
      change_sigma = self.sigma_alpha * delta_sigma
      change_sigma = np.minimum(change_sigma, self.sigma_max_change * self.sigma)
      change_sigma = np.maximum(change_sigma, - self.sigma_max_change * self.sigma)
      
      flag= np.all(np.isnan(change_sigma))

      self.diversity_best=self.diversity_base
#       if self.rewardWindow.evident():
#         diversity_bound = self.rewardWindow.lastDiff()/self.entropyWindow.lastDiff()  # be positive 
#         self.diversity_best = min (self.diversity_best, diversity_bound)
#         self.diversity_best = max (0,self.diversity_best)
#         if self.rewardWindow.lastDiff()<-self.rewardWindow.std() or self.entropyWindow.lastDiff()<0:
#             self.diversity_best = 0
#       if self.option==0:
#         self.sigma += self.learning_rate*self.diversity_best
#       if self.option==1:
#         self.sigma += self.sigma*self.learning_rate*self.diversity_best
#       if self 
      self.sigma += np.power(self.sigma, self.option)*self.learning_rate*self.diversity_best

        
    # done 
      self.sigma += change_sigma
      self.diversityWindow.update(self.diversity_best)

    
#     if all(self.sigma：
#         self.sigma =  np.ones(self.num_params) * self.sigma_init/1000000000
    self.sigma =abs(self.sigma)

    if (self.sigma_decay < 1 ):
      self.sigma[self.sigma > self.sigma_limit] *= self.sigma_decay
    
    if (self.learning_rate_decay < 1 and self.learning_rate > self.learning_rate_limit):
      self.learning_rate *= self.learning_rate_decay

  def current_param(self):
    return self.curr_best_mu

  def set_mu(self, mu):
    self.mu = np.array(mu)
  
  def best_param(self):
    return self.best_mu

  def result(self): # return best params so far, along with historically best reward, curr reward, sigma
    return (self.best_mu, self.best_reward, self.curr_best_reward, self.sigma, self.diversity_best)

In [8]:
def test_solver(solver,printLog=True):
	history = []
	for j in range(MAX_ITERATION):
		solutions = solver.ask()
		fitness_list = np.zeros(solver.popsize)
		for i in range(solver.popsize):
			fitness_list[i] = fit_func(solutions[i])
		solver.tell(fitness_list)
		result = solver.result() # first element is the best solution, second element is the best fitness
		history.append(abs(result[1]))
		if (j+1) % 5000 == 0 and printLog:
		  print("fitness at iteration", (j+1), result[1])
	# print("local optimum discovered by solver:\n", result[0])
	print("fitness score at this local optimum:", result[1])
	return history

In [9]:
def batch_debug_solver(solver,printLog=True,evaluation=30):
    result=[]
    #solver_copy=copy.copy(solver)
    for j in range(evaluation):
        es=copy.deepcopy(solver)
        _,history=debug_solver(es,printLog=printLog)
        result.append(history)
    return np.array(result)
        

In [10]:
def debug_solver(solver,printLog=True):
	history_population=[]
	history=[]
	x = np.random.randn(NPARAMS)
	for j in range(MAX_ITERATION):
		solutions =solver.ask()
		fitness_list = np.zeros(solver.popsize)
		for i in range(solver.popsize):
			fitness_list[i]=fit_func(solutions[i])
		solver.tell(fitness_list)
		result =solver.result()
		if ((j+1)%1000 ==0 or j==0) and printLog:
			print("fitness at iteration",(j+1),fitness_list.mean(),result[1])
# 		if j%10!=0:
# 			continue 
		history_population.append(solutions)
		if len(result)==4:
			history.append([abs(result[1]),abs(fitness_list.mean()),calEntropy(result[3]),abs(fitness_list.std())])
		else:
			history.append([abs(result[1]),abs(fitness_list.mean()),calEntropy(result[3]),abs(fitness_list.std()),result[-1]])            
	print("fitness score at this local optimum:",result[1])
	return history_population,np.array(history)

In [11]:
x = np.random.randn(NPARAMS)
print("The fitness of initial guess", fit_func(x)) 
print(NPARAMS)

The fitness of initial guess -16007.2118865
32


In [13]:
import cloudpickle
from collections import namedtuple
def pickle_write(data,method,fname):
    pickle_out=open(method+fname+".pickle","wb")
    cloudpickle.dump(data,pickle_out)
    pickle_out.close()


def readData(name):
	pickle_in=open(name+".pickle","rb")
	data=cloudpickle.load(pickle_in)
	return data 

Config=namedtuple('configuration',['popsize','learning_rate','sigma_init','diversity_base'])

In [14]:
def curvePlot(historys,labelName,start,end):
    epoch=np.linspace(start,end+1,end-start)
    x=np.max(historys[:,start:end],axis=0)
    y=np.min(historys[:,start:end],axis=0)
    #print(x.shape)
#     plt.fill_between(epoch,x,y,alpha=0.5)
#     ""
#     for index in range(30):
#         plt.plot(epoch,historys[index,start:end],linewidth=1.0,linestyle="-",alpha=0.2,color='black')
    avg_line,=plt.plot(epoch,np.mean(historys[:,start:end],axis=0),linewidth=1.0,linestyle="-",label=labelName)
    
    return avg_line 

def title_gen(args):
    return "{}-{} P:{}".format(args.func,args.dimension, args.popsize)

def label_gen(config,method):
    return "{}-lr:{}-{}-{}".format(method,config.learning_rate, config.sigma_init,config.diversity_base)

def setPlot(logs, method,index):
    lgs=[] # for legends 
    for config in logs.keys():
        label=label_gen(config,method)
        h=logs[config]
        tline=curvePlot(h[:,:,index],label,start,end)
        lgs.append(copy.copy(tline))

    plt.xlabel('Generation')
    plt.ylabel('Loss')
    plt.yscale('log')
#     plt.title(title_gen(args))
    plt.legend(handles=lgs, loc='best')
    plt.show()

In [15]:
from collections import namedtuple
import copy 
pepgLogs=dict()
for sigma_init_val in [0.1]:
    for lr in [0.01]:
        pepg = PEPG(NPARAMS,                         # number of model parameters
	    sigma_init=sigma_init_val,                 # initial standard deviation
	    learning_rate=lr ,             # learning rate for standard deviation
	    learning_rate_decay=0.9999,       # don't anneal the learning rate
	    popsize=NPOPULATION,             # population size
	    average_baseline=False,          # set baseline to average of batch
	    weight_decay=0.00,            # weight decay coefficient
	    rank_fitness=False,           # use rank rather than fitness numbers
	    forget_best=False)  
        
        h= batch_debug_solver(pepg,printLog=False)
        print("=====================================================================")
        config=Config(popsize=NPOPULATION, learning_rate=lr, sigma_init=sigma_init_val,diversity_base=0)
        pepgLogs[config]=copy.copy(h)

fitness score at this local optimum: -0.0314776891986
fitness score at this local optimum: -0.0580554235824
fitness score at this local optimum: -0.029386457522
fitness score at this local optimum: -0.0366954867632
fitness score at this local optimum: -0.0561364599877
fitness score at this local optimum: -0.0557404042058
fitness score at this local optimum: -0.0142542735837
fitness score at this local optimum: -0.0285388533258
fitness score at this local optimum: -0.0424541330684
fitness score at this local optimum: -0.0559525106168
fitness score at this local optimum: -0.0476571299063
fitness score at this local optimum: -0.0558555369008
fitness score at this local optimum: -0.0513484013023
fitness score at this local optimum: -0.0418536562551
fitness score at this local optimum: -0.0537019514048
fitness score at this local optimum: -0.0386939173303
fitness score at this local optimum: -0.0619894584362
fitness score at this local optimum: -0.0473122466334
fitness score at this local o

In [None]:
pepg_ad_Logs=dict()

for sigma_init_val in [0.1]:
    for lr in [0.01,0.1]:
        for db in [0.01]:
            for opt in [0,1,2,3]:
                pepga = PEPGAdaptive(NPARAMS,                         # number of model parameters
                sigma_init=sigma_init_val,                 # initial standard deviation
                learning_rate=lr ,             # learning rate for standard deviation
                learning_rate_decay=0.9999,       # don't anneal the learning rate
                popsize=NPOPULATION,             # population size
                average_baseline=False,          # set baseline to average of batch
                weight_decay=0.00,            # weight decay coefficient
                rank_fitness=False,           # use rank rather than fitness numbers
                forget_best=False,
                option = opt ,
                diversity_base = db)
                h= batch_debug_solver(pepga,printLog=False)              
                print("=====================================================================")
                config=Config(popsize=NPOPULATION, learning_rate=lr, sigma_init=sigma_init_val,diversity_base=db)
                pepg_ad_Logs[(option,config)]=copy.copy(h)

fitness score at this local optimum: -0.540465989144
fitness score at this local optimum: -0.355815878259
fitness score at this local optimum: -0.556282821157
fitness score at this local optimum: -0.671490895206
fitness score at this local optimum: -0.439818498763
fitness score at this local optimum: -0.477098841811
fitness score at this local optimum: -0.428717102554
fitness score at this local optimum: -0.401209779811
fitness score at this local optimum: -0.491513948828
fitness score at this local optimum: -0.65662325529
fitness score at this local optimum: -0.61758677181
fitness score at this local optimum: -0.448386197179
fitness score at this local optimum: -0.565506807341
fitness score at this local optimum: -0.471365277767
fitness score at this local optimum: -0.52029490183
fitness score at this local optimum: -0.57087773271
fitness score at this local optimum: -0.539569526968
fitness score at this local optimum: -0.502511764839
fitness score at this local optimum: -0.3855178101

In [None]:
print(pepg_ad0_Logs.keys())

In [None]:
# start= 30000
# end = 60000
# setPlot(pepg_v_Logs,"PEPG-Varaint",1) 

start= 0
end = 60000
setPlot(pepg_a_Logs,"PEPG-Aadpative",1) 

In [None]:

config=Config(popsize=NPOPULATION, learning_rate=0.01, sigma_init=0.1,diversity_base=0)
aconfig=Config(popsize=17, learning_rate=0.01, sigma_init=0.1,diversity_base=0.01)
lconfig=Config(popsize=17, learning_rate=0.1, sigma_init=0.1,diversity_base=0.01)

In [None]:
result=dict()
# result['PEPG']=pepgLogs[config]
result['PEPG_AD0']=pepg_ad0_Logs[aconfig]
# result['PEPG_AD0L']=pepg_ad0_Logs[lconfig]
result['PEPG_AD1']=pepg_ad1_Logs[aconfig]


In [None]:
labelDict=dict()
labelDict[1]='Loss (mean)'
labelDict[0]='Loss (best)'
labelDict[2]='Entropy '
labelDict[3]='Reward Std'
labelDict[4]='Act'

def showBest(logs,index):
    lgs=[] # for legends 
    for method in logs.keys():
        label=method
        h=logs[method]
        tline=curvePlot(h[:,:,index],label,start,end)
        lgs.append(copy.copy(tline))

    plt.xlabel('Generation')
    if index in [0,1,3]:
        plt.yscale('log')
    plt.ylabel(labelDict[index])
#     plt.title(title_gen(args))
    plt.legend(handles=lgs, loc='best')
    plt.show()

In [None]:
start=0 
end = 60000
showBest(result,0) # best 
# showBest(result,4) # 
showBest(result,1)
showBest(result,2)
showBest(result,3)





# plotsample(sampleIndex,0)
# plotsample(sampleIndex,1)
# # plotsample(sampleIndex,2)
# plotsample(sampleIndex,3)
# plotsample(sampleIndex,4)




In [None]:
# random pick one sample to check action 
sampleIndex=2
epoch=np.linspace(start,end+1,end-start)
# plt.plot(epoch,pepga_best[sampleIndex,start:end,-1])
# plt.show()

def plotsample(sampleIndex,index):
    if index<=3:
        plt.plot(epoch,pepg_best[sampleIndex,start:end,index],label='PEPG')
    plt.plot(epoch,pepgv_best[sampleIndex,start:end,index],label='PEPG_Variant')
    plt.plot(epoch,pepga_best[sampleIndex,start:end,index],label='PEPG_Adaptive')
    plt.xlabel('Generation')
    if index in [0,1,3]:
        plt.yscale('log')
    plt.ylabel(labelDict[index])
    plt.title("Sample "+title_gen(args))
    plt.legend(loc='best')
    plt.show()

In [None]:
plt.plot(epoch,np.mean(pepga_best[:,start:end,-1],axis=0))
plt.ylabel('Act')
plt.show()
epoch=np.linspace(start,end+1,end-start)
plt.plot(epoch,np.mean(pepg_best[:,start:end,1],axis=0),label='PEPG')
plt.plot(epoch,np.mean(pepgv_best[:,start:end,1],axis=0),label='PEPG_Variant')
plt.plot(epoch,np.mean(pepga_best[:,start:end,1],axis=0),label='PEPG_Adaptive')
plt.legend(loc='best')
# plt.yscale('log')
plt.ylabel('Loss')
plt.show()

In [None]:
def cal_diff(x):
    x =np.array(x)
    start = np.array([0])
    y = np.concatenate((start,x))
    
    return (x-y[:-1])[1:]
    
#     return x-y


In [None]:
plt.plot(np.mean(pepg_vadaptive_historys[:,:,2],axis=0))
plt.show()

In [None]:
plt.plot(pepg_vadaptive_historys[1,35000:40000,-1])
plt.show()
sum(pepg_vadaptive_historys[1,35000:40000,-1])

In [None]:
def cal_meanDiff(historys):
    return cal_diff(np.mean(historys[:,start:end],axis=0))

In [None]:
FDiff=cal_diff(pepg_vadaptive_historys[1,:,1])

EDiff=cal_diff(pepg_vadaptive_historys[1,:,2])
# plt.plot(FDiff,alpha=0.5,color='b')


# plt.plot()
plt.plot(FDiff,alpha=0.5,color='r')
# plt.plot(pepg_vadaptive_historys[1,:,-1],alpha=0.5,color='b')
# plt.plot(FDiff/EDiff)
# plt.ylim(-0.01,0.01)
# plt.yscale('log')
plt.show()

In [None]:
plt.plot(FDiff)
plt.show()

In [None]:
plt.figure(figsize=(8,4), dpi=150)
start=0
end=MAX_ITERATION
pepg_line, = plt.plot(pepg_historys[start:end,1], color="blue", linewidth=1.0, linestyle="-", label='PEPG / NES')
pepgv_line,= plt.plot([start:end,1], color="red", linewidth=1.0, linestyle="-", label='PEPGV-0.01-19')
lgs=[]
lgs.append(pepg_line)
lgs.append(pepgv_line)

In [None]:
plt.xlabel('Generation')
plt.ylabel('Loss')
plt.yscale('log')
plt.legend(handles=lgs, loc='best')

In [None]:
funcName="Rosenbrock"
plt.title(funcName+"_"+str(NPARAMS)+"d"+"_population")
plt.show()

In [None]:
plt.figure(figsize=(8,4), dpi=150)
start=0
# end=10000
end=MAX_ITERATION
pepg_line, = plt.plot(pepg_historys[start:end,0], color="blue", linewidth=1.0, linestyle="-.", label='PEPG / NES')
pepgv_line,= plt.plot(pepg_variant_historys[start:end,0], color="red", linewidth=1.0, linestyle="-", label='PEPGV-0.01-')
pepgTA_line,= plt.plot(pepg_vadaptive_historys[start:end,0], color="purple", linewidth=1.0, linestyle="-", label='PEPGV-0.05-101')
lgs=[]
lgs.append(pepg_line)
lgs.append(pepgv_line)
lgs.append(pepgTA_line)
plt.xlabel('Generation')
plt.ylabel('Loss')
# plt.yscale('log')
plt.legend(handles=lgs, loc='best')
funcName="Rosenbrock"
plt.title(funcName+"_"+str(NPARAMS)+"d"+"_niche")
plt.show()

In [None]:
start,end=0,MAX_ITERATION
plt.figure(figsize=(8,4), dpi=150)
pepg_line, = plt.plot(pepg_history[start:end,2], color="blue", linewidth=1.0, linestyle="-.", label='PEPG')
pepgv_line,= plt.plot(pepgv_history[start:end,2], color="red", linewidth=1.0, linestyle="-", label='PEPGV')
lg=[]
lg.append(pepg_line)
lg.append(pepgv_line)
plt.yscale('log')
plt.xlabel('Generation')
plt.ylabel('Entropy')
plt.legend(handles=lgs, loc='best')
funcName="Rosenbrock"
plt.title(funcName+"_"+str(NPARAMS)+"d")
plt.show()

In [None]:
print(sum(pepgv_history[:,-1]))
print(pepgv_history[:,-1])

In [None]:
plt.figure(figsize=(5,4), dpi=150)

In [None]:
start=0
end=1000
pepg_line, = plt.plot(pepg_history[start:end,2], color="blue", linewidth=1.0, linestyle="-.", label='PEPG')
pepgv_line,= plt.plot(pepgv_history[start:end,2], color="red", linewidth=1.0, linestyle="-", label='PEPGV')
lg=[]
lg.append(pepg_line)
lg.append(pepgv_line)
plt.legend(handles=lgs, loc='best')

In [None]:
plt.xlabel('Generation')
plt.ylabel('Entropy')
# plt.yscale('log')
plt.title(funcName+"_"+str(NPARAMS)+"d")

plt.show()

In [None]:
print(sum(pepgv_history[:,-1]))
plt.plot(pepgv_history[:500,-1],color="red", linewidth=1.0, label='PEPGV')
plt.xlabel('Generation')
plt.ylabel('Fool-Act')
# plt.yscale('log')
plt.show()
print(sum(pepgv_history[:500,-1]))

In [None]:
plt.plot(pepgv_history[start:500,1],color='red')
plt.yscale('log')
plt.xlabel('Generation')
plt.ylabel('Loss')
plt.show()